From 0ed6d97ef230d71e51d954485d29927f3a07b989 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 16 Jun 2026 14:58:43 +0200 Subject: [PATCH 1/3] Add arm64 go feature id Adds the following: ```go // ARM64Level returns the ARMv8/ARMv9 architecture version supported by the CPU // as (major, minor), e.g. 8, 4 for ARMv8.4-A or 9, 0 for ARMv9.0-A. // Only mandatory user-space instruction features are considered, so the result // is the highest level whose required instructions are all present. // Returns 0, 0 on non-arm64 CPUs or when feature detection was unavailable. func (c CPUInfo) ARM64Level() (major, minor int) // GOARM64 returns a value usable as the GOARM64 build setting for the detected // CPU, e.g. "v8.4" or "v9.0,crypto". The ",crypto" suffix is appended when AES, // PMULL, SHA1 and SHA256 are all present; the ",lse" suffix is appended in the // rare case LSE is present without the rest of the ARMv8.1 feature set. // Returns "" on non-arm64 CPUs or when feature detection was unavailable. // See https://go.dev/wiki/MinimumRequirements#arm64 func (c CPUInfo) GOARM64() string ``` Fixes #159 --- .github/workflows/go.yml | 8 ++- cmd/cpuid/main.go | 5 ++ cpuid.go | 82 ++++++++++++++++++++++++++++- cpuid_test.go | 78 +++++++++++++++++++++++++++ detect_arm64.go | 8 +++ featureid_string.go | 111 ++++++++++++++++++++++----------------- go.mod | 4 +- go.sum | 4 +- os_darwin_arm64.go | 13 +++++ os_linux_arm64.go | 23 ++++++-- os_safe_linux_arm64.go | 1 + os_unsafe_linux_arm64.go | 3 ++ 12 files changed, 281 insertions(+), 59 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0897f52..ea4dc27 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -29,7 +29,13 @@ jobs: - name: Test Noasm run: go test -tags=noasm ./... - + + - name: Run cpuid command + run: go run cmd/cpuid/main.go + + - name: Run cpuid command noasm + run: go run -tags=noasm cmd/cpuid/main.go + build-special: env: CGO_ENABLED: 0 diff --git a/cmd/cpuid/main.go b/cmd/cpuid/main.go index b99a7f7..e9c4be5 100644 --- a/cmd/cpuid/main.go +++ b/cmd/cpuid/main.go @@ -44,11 +44,13 @@ func main() { Features []string X64Level int RVProfile int + GOARM64 string `json:"GOARM64,omitempty"` }{ CPUInfo: cpuid.CPU, Features: cpuid.CPU.FeatureSet(), X64Level: cpuid.CPU.X64Level(), RVProfile: cpuid.CPU.RVProfile(), + GOARM64: cpuid.CPU.GOARM64(), } b, err := json.MarshalIndent(info, "", " ") if err != nil { @@ -73,6 +75,9 @@ func main() { if rvp := cpuid.CPU.RVProfile(); rvp > 0 { fmt.Printf("RISC-V Profile: RVA%d\n", rvp) } + if v := cpuid.CPU.GOARM64(); v != "" { + fmt.Println("GOARM64:", v) + } fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine) fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1I, "bytes") fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes") diff --git a/cpuid.go b/cpuid.go index ec080e2..97cdaf6 100644 --- a/cpuid.go +++ b/cpuid.go @@ -318,6 +318,19 @@ const ( SM3 // SM3 instructions SM4 // SM4 instructions SVE // Scalable Vector Extension + SVE2 // Scalable Vector Extension 2 + SB // Speculation barrier (SB instruction) + SSBS // Speculative Store Bypass Safe (PSTATE.SSBS) + BTI // Branch Target Identification + FLAGM2 // Condition flag manipulation version 2 (AXFLAG, XAFLAG) + FRINTTS // Floating-point to integer rounding (FRINT32Z, FRINT64Z, etc) + DCPODP // Data cache clean to Point of Deep Persistence (DC CVADP) + BF16 // BFloat16 instructions (BFDOT, BFMMLA, etc) + I8MM // Int8 matrix multiplication (SMMLA, UMMLA, USMMLA) + WFXT // WFE/WFI with timeout (WFET, WFIT) + MOPS // Memory copy and set instructions (CPYF, SETP, etc) + HBC // Hinted conditional branches (BC.cond) + CSSC // Common short sequence compression (ABS, SMAX, UMAX, etc) // PMU PMU_FIXEDCOUNTER_CYCLES @@ -582,6 +595,29 @@ var rvZKSFeatures = CombineFeatures(RV_ZKSED, RV_ZKSH, RV_ZBKB, RV_ZBKC, RV_ZBKX var rvZVKNFeatures = CombineFeatures(RV_ZVKNED, RV_ZVKNHB, RV_ZVKG, RV_ZVKB, RV_ZVKT) var rvZVKSFeatures = CombineFeatures(RV_ZVKSED, RV_ZVKSH, RV_ZVKG, RV_ZVKB, RV_ZVKT) +// ARM64 architecture levels. armV8Levels[m] is the cumulative set of mandatory +// user-space instruction features added up to and including ARMv8.m that this +// package can detect. EL1/system-only features (PAN, VHE, CSV2/CSV3, ECV, ...) +// are excluded since they are irrelevant to user-space code generation, exactly +// as X64Level ignores non-instruction features. +// https://go.dev/wiki/MinimumRequirements#arm64 +var armV8Levels = [...]Features{ + CombineFeatures(FP, ASIMD), // v8.0 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP), // v8.5 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9 +} + +// armCrypto matches the GOARM64 ",crypto" option: FEAT_AES, FEAT_PMULL, +// FEAT_SHA1 and FEAT_SHA256. +var armCrypto = CombineFeatures(AESARM, PMULL, SHA1, SHA2) + // X64Level returns the microarchitecture level detected on the CPU. // If features are lacking or non x64 mode, 0 is returned. // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels @@ -606,7 +642,7 @@ func (c CPUInfo) X64Level() int { // RVProfile returns the RISC-V application profile level. // 0 = unknown / base ISA only, 20 = RVA20, 22 = RVA22, 23 = RVA23. -// Returns 0 on non-RISC-V architectures. +// Returns 0 on non-RISC-V architectures or if not detected. // https://github.com/riscv/riscv-profiles func (c CPUInfo) RVProfile() int { switch { @@ -621,6 +657,50 @@ func (c CPUInfo) RVProfile() int { } } +// ARM64Level returns the ARMv8/ARMv9 architecture version supported by the CPU +// as (major, minor), e.g. 8, 4 for ARMv8.4-A or 9, 0 for ARMv9.0-A. +// Only mandatory user-space instruction features are considered, so the result +// is the highest level whose required instructions are all present. +// Returns 0, 0 on non-arm64 CPUs or when feature detection was unavailable. +func (c CPUInfo) ARM64Level() (major, minor int) { + if !c.featureSet.hasSetP(armV8Levels[0]) { + return 0, 0 + } + m8 := 0 + for m := len(armV8Levels) - 1; m >= 1; m-- { + if c.featureSet.hasSetP(armV8Levels[m]) { + m8 = m + break + } + } + // ARMv9.x mandates everything in ARMv8.(x+5) plus SVE2. + if m8 >= 5 && c.featureSet.inSet(SVE2) { + return 9, m8 - 5 + } + return 8, m8 +} + +// GOARM64 returns a value usable as the GOARM64 build setting for the detected +// CPU, e.g. "v8.4" or "v9.0,crypto". The ",crypto" suffix is appended when AES, +// PMULL, SHA1 and SHA256 are all present; the ",lse" suffix is appended in the +// rare case LSE is present without the rest of the ARMv8.1 feature set. +// Returns "" on non-arm64 CPUs or when feature detection was unavailable. +// See https://go.dev/wiki/MinimumRequirements#arm64 +func (c CPUInfo) GOARM64() string { + major, minor := c.ARM64Level() + if major == 0 { + return "" + } + v := fmt.Sprintf("v%d.%d", major, minor) + if major == 8 && minor == 0 && c.featureSet.inSet(ATOMICS) { + v += ",lse" + } + if c.featureSet.hasSetP(armCrypto) { + v += ",crypto" + } + return v +} + // Disable will disable one or several features. func (c *CPUInfo) Disable(ids ...FeatureID) bool { for _, id := range ids { diff --git a/cpuid_test.go b/cpuid_test.go index f8b3e99..5221f9d 100644 --- a/cpuid_test.go +++ b/cpuid_test.go @@ -380,6 +380,84 @@ func TestHasOneOf(t *testing.T) { } } +func armCPU(ids ...FeatureID) CPUInfo { + var c CPUInfo + for _, id := range ids { + c.featureSet.set(id) + } + return c +} + +// addFeats returns a fresh slice of base plus add, avoiding append aliasing. +func addFeats(base []FeatureID, add ...FeatureID) []FeatureID { + out := make([]FeatureID, 0, len(base)+len(add)) + out = append(out, base...) + return append(out, add...) +} + +func TestARM64Level(t *testing.T) { + v80 := []FeatureID{FP, ASIMD} + v81 := addFeats(v80, ATOMICS, CRC32, ASIMDRDM) + v82 := addFeats(v81, DCPOP) + v83 := addFeats(v82, JSCVT, FCMA, LRCPC) + v84 := addFeats(v83, TS) + v85 := addFeats(v84, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP) + v86 := addFeats(v85, BF16, I8MM) + + tests := []struct { + name string + ids []FeatureID + major, minor int + }{ + {"none", nil, 0, 0}, + {"fp-without-asimd", []FeatureID{FP}, 0, 0}, + {"v8.0", v80, 8, 0}, + {"v8.1", v81, 8, 1}, + {"v8.1-incomplete", addFeats(v80, ATOMICS, CRC32), 8, 0}, + {"v8.2", v82, 8, 2}, + {"v8.3", v83, 8, 3}, + {"v8.4", v84, 8, 4}, + {"v8.5", v85, 8, 5}, + {"v8.6", v86, 8, 6}, + {"v9.0", addFeats(v85, SVE2), 9, 0}, + {"v9.1", addFeats(v86, SVE2), 9, 1}, + {"sve2-without-v8.5", addFeats(v80, SVE2), 8, 0}, + } + for _, tc := range tests { + c := armCPU(tc.ids...) + if maj, min := c.ARM64Level(); maj != tc.major || min != tc.minor { + t.Errorf("%s: ARM64Level() = %d.%d, want %d.%d", tc.name, maj, min, tc.major, tc.minor) + } + } +} + +func TestGOARM64(t *testing.T) { + v80 := []FeatureID{FP, ASIMD} + v83 := addFeats(v80, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC) + v85 := addFeats(v83, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP) + + tests := []struct { + name string + ids []FeatureID + want string + }{ + {"none", nil, ""}, + {"v8.0", v80, "v8.0"}, + {"v8.0+lse", addFeats(v80, ATOMICS), "v8.0,lse"}, + {"v8.0+crypto", addFeats(v80, AESARM, PMULL, SHA1, SHA2), "v8.0,crypto"}, + {"v8.0+lse+crypto", []FeatureID{FP, ASIMD, ATOMICS, AESARM, PMULL, SHA1, SHA2}, "v8.0,lse,crypto"}, + {"v8.3", v83, "v8.3"}, + {"v8.3+crypto", addFeats(v83, AESARM, PMULL, SHA1, SHA2), "v8.3,crypto"}, + {"v9.0", addFeats(v85, SVE2), "v9.0"}, + } + for _, tc := range tests { + c := armCPU(tc.ids...) + if got := c.GOARM64(); got != tc.want { + t.Errorf("%s: GOARM64() = %q, want %q", tc.name, got, tc.want) + } + } +} + func TestParseISAString(t *testing.T) { tests := []struct { isa string diff --git a/detect_arm64.go b/detect_arm64.go index 8b36a39..e615b10 100644 --- a/detect_arm64.go +++ b/detect_arm64.go @@ -188,6 +188,7 @@ func addInfo(c *CPUInfo, safe bool) { f.setIf(instAttrReg0&(0xf<<60) != 0, RNDR) f.setIf(instAttrReg0&(0xf<<56) != 0, TLB) f.setIf(instAttrReg0&(0xf<<52) != 0, TS) + f.setIf(instAttrReg0&(0xf<<52) == 2<<52, FLAGM2) // TS == 0b0010 (FEAT_FlagM2) f.setIf(instAttrReg0&(0xf<<48) != 0, FHM) f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP) f.setIf(instAttrReg0&(0xf<<40) != 0, SM4) @@ -243,6 +244,13 @@ func addInfo(c *CPUInfo, safe bool) { // fmt.Println("APA") // } f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP) + f.setIf(instAttrReg1&(0xf<<0) == 2<<0, DCPODP) // DPB == 0b0010 (FEAT_DPB2) + + // Upper ID_AA64ISAR1_EL1 fields, not in the table above. + f.setIf(instAttrReg1&(0xf<<32) != 0, FRINTTS) // bits [35:32] + f.setIf(instAttrReg1&(0xf<<36) != 0, SB) // bits [39:36] + f.setIf(instAttrReg1&(0xf<<44) != 0, BF16) // bits [47:44] + f.setIf(instAttrReg1&(0xf<<52) != 0, I8MM) // bits [55:52] // Store c.featureSet.or(f) diff --git a/featureid_string.go b/featureid_string.go index 6bc752e..f88ead1 100644 --- a/featureid_string.go +++ b/featureid_string.go @@ -242,59 +242,72 @@ func _() { _ = x[SM3-232] _ = x[SM4-233] _ = x[SVE-234] - _ = x[PMU_FIXEDCOUNTER_CYCLES-235] - _ = x[PMU_FIXEDCOUNTER_REFCYCLES-236] - _ = x[PMU_FIXEDCOUNTER_INSTRUCTIONS-237] - _ = x[PMU_FIXEDCOUNTER_TOPDOWN_SLOTS-238] - _ = x[RV_IMA-239] - _ = x[RV_C-240] - _ = x[RV_F-241] - _ = x[RV_D-242] - _ = x[RV_V-243] - _ = x[RV_ZBA-244] - _ = x[RV_ZBB-245] - _ = x[RV_ZBC-246] - _ = x[RV_ZBS-247] - _ = x[RV_ZICOND-248] - _ = x[RV_ZIHINTPAUSE-249] - _ = x[RV_ZICBOM-250] - _ = x[RV_ZICBOZ-251] - _ = x[RV_ZICBOP-252] - _ = x[RV_ZFA-253] - _ = x[RV_ZFH-254] - _ = x[RV_ZFHMIN-255] - _ = x[RV_ZTSO-256] - _ = x[RV_ZACAS-257] - _ = x[RV_ZBKB-258] - _ = x[RV_ZBKC-259] - _ = x[RV_ZBKX-260] - _ = x[RV_ZKND-261] - _ = x[RV_ZKNE-262] - _ = x[RV_ZKNH-263] - _ = x[RV_ZKSED-264] - _ = x[RV_ZKSH-265] - _ = x[RV_ZKT-266] - _ = x[RV_ZKN-267] - _ = x[RV_ZKS-268] - _ = x[RV_ZVBB-269] - _ = x[RV_ZVBC-270] - _ = x[RV_ZVKB-271] - _ = x[RV_ZVKG-272] - _ = x[RV_ZVKNED-273] - _ = x[RV_ZVKNHA-274] - _ = x[RV_ZVKNHB-275] - _ = x[RV_ZVKSED-276] - _ = x[RV_ZVKSH-277] - _ = x[RV_ZVKT-278] - _ = x[RV_ZVKNG-279] - _ = x[RV_ZVKSG-280] - _ = x[lastID-281] + _ = x[SVE2-235] + _ = x[SB-236] + _ = x[SSBS-237] + _ = x[BTI-238] + _ = x[FLAGM2-239] + _ = x[FRINTTS-240] + _ = x[DCPODP-241] + _ = x[BF16-242] + _ = x[I8MM-243] + _ = x[WFXT-244] + _ = x[MOPS-245] + _ = x[HBC-246] + _ = x[CSSC-247] + _ = x[PMU_FIXEDCOUNTER_CYCLES-248] + _ = x[PMU_FIXEDCOUNTER_REFCYCLES-249] + _ = x[PMU_FIXEDCOUNTER_INSTRUCTIONS-250] + _ = x[PMU_FIXEDCOUNTER_TOPDOWN_SLOTS-251] + _ = x[RV_IMA-252] + _ = x[RV_C-253] + _ = x[RV_F-254] + _ = x[RV_D-255] + _ = x[RV_V-256] + _ = x[RV_ZBA-257] + _ = x[RV_ZBB-258] + _ = x[RV_ZBC-259] + _ = x[RV_ZBS-260] + _ = x[RV_ZICOND-261] + _ = x[RV_ZIHINTPAUSE-262] + _ = x[RV_ZICBOM-263] + _ = x[RV_ZICBOZ-264] + _ = x[RV_ZICBOP-265] + _ = x[RV_ZFA-266] + _ = x[RV_ZFH-267] + _ = x[RV_ZFHMIN-268] + _ = x[RV_ZTSO-269] + _ = x[RV_ZACAS-270] + _ = x[RV_ZBKB-271] + _ = x[RV_ZBKC-272] + _ = x[RV_ZBKX-273] + _ = x[RV_ZKND-274] + _ = x[RV_ZKNE-275] + _ = x[RV_ZKNH-276] + _ = x[RV_ZKSED-277] + _ = x[RV_ZKSH-278] + _ = x[RV_ZKT-279] + _ = x[RV_ZKN-280] + _ = x[RV_ZKS-281] + _ = x[RV_ZVBB-282] + _ = x[RV_ZVBC-283] + _ = x[RV_ZVKB-284] + _ = x[RV_ZVKG-285] + _ = x[RV_ZVKNED-286] + _ = x[RV_ZVKNHA-287] + _ = x[RV_ZVKNHB-288] + _ = x[RV_ZVKSED-289] + _ = x[RV_ZVKSH-290] + _ = x[RV_ZVKT-291] + _ = x[RV_ZVKNG-292] + _ = x[RV_ZVKSG-293] + _ = x[lastID-294] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXFP8AMXTILEAMXTF32AMXCOMPLEXAMXTRANSPOSEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BMMAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FREDFSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSGXPQCSHASMESME_COHERENTSM3_X86SM4_X86SPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSA_L1_NOTSA_SQ_NOTSA_VERW_CLEARTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFHMFPFPHPGPAJSCVTLRCPCPMULLRNDRTLBTSSHA1SHA2SHA3SHA512SM3SM4SVEPMU_FIXEDCOUNTER_CYCLESPMU_FIXEDCOUNTER_REFCYCLESPMU_FIXEDCOUNTER_INSTRUCTIONSPMU_FIXEDCOUNTER_TOPDOWN_SLOTSRV_IMARV_CRV_FRV_DRV_VRV_ZBARV_ZBBRV_ZBCRV_ZBSRV_ZICONDRV_ZIHINTPAUSERV_ZICBOMRV_ZICBOZRV_ZICBOPRV_ZFARV_ZFHRV_ZFHMINRV_ZTSORV_ZACASRV_ZBKBRV_ZBKCRV_ZBKXRV_ZKNDRV_ZKNERV_ZKNHRV_ZKSEDRV_ZKSHRV_ZKTRV_ZKNRV_ZKSRV_ZVBBRV_ZVBCRV_ZVKBRV_ZVKGRV_ZVKNEDRV_ZVKNHARV_ZVKNHBRV_ZVKSEDRV_ZVKSHRV_ZVKTRV_ZVKNGRV_ZVKSGlastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXFP8AMXTILEAMXTF32AMXCOMPLEXAMXTRANSPOSEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BMMAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FREDFSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSGXPQCSHASMESME_COHERENTSM3_X86SM4_X86SPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSA_L1_NOTSA_SQ_NOTSA_VERW_CLEARTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFHMFPFPHPGPAJSCVTLRCPCPMULLRNDRTLBTSSHA1SHA2SHA3SHA512SM3SM4SVESVE2SBSSBSBTIFLAGM2FRINTTSDCPODPBF16I8MMWFXTMOPSHBCCSSCPMU_FIXEDCOUNTER_CYCLESPMU_FIXEDCOUNTER_REFCYCLESPMU_FIXEDCOUNTER_INSTRUCTIONSPMU_FIXEDCOUNTER_TOPDOWN_SLOTSRV_IMARV_CRV_FRV_DRV_VRV_ZBARV_ZBBRV_ZBCRV_ZBSRV_ZICONDRV_ZIHINTPAUSERV_ZICBOMRV_ZICBOZRV_ZICBOPRV_ZFARV_ZFHRV_ZFHMINRV_ZTSORV_ZACASRV_ZBKBRV_ZBKCRV_ZBKXRV_ZKNDRV_ZKNERV_ZKNHRV_ZKSEDRV_ZKSHRV_ZKTRV_ZKNRV_ZKSRV_ZVBBRV_ZVBCRV_ZVKBRV_ZVKGRV_ZVKNEDRV_ZVKNHARV_ZVKNHBRV_ZVKSEDRV_ZVKSHRV_ZVKTRV_ZVKNGRV_ZVKSGlastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 61, 68, 75, 85, 97, 102, 105, 110, 119, 128, 137, 141, 151, 163, 172, 180, 188, 196, 204, 211, 221, 231, 239, 249, 260, 268, 278, 296, 311, 318, 330, 337, 344, 355, 367, 375, 379, 383, 389, 394, 402, 407, 413, 417, 426, 444, 452, 459, 463, 467, 481, 487, 491, 495, 504, 508, 512, 517, 522, 526, 530, 534, 541, 545, 548, 554, 557, 560, 570, 580, 593, 606, 610, 621, 625, 639, 656, 659, 669, 680, 686, 694, 705, 713, 725, 741, 755, 766, 776, 791, 799, 810, 820, 827, 836, 846, 850, 853, 860, 865, 876, 883, 890, 898, 901, 907, 912, 921, 928, 936, 940, 943, 949, 956, 969, 974, 976, 983, 990, 996, 1000, 1009, 1013, 1018, 1024, 1030, 1036, 1046, 1049, 1065, 1069, 1078, 1081, 1090, 1105, 1118, 1124, 1138, 1145, 1148, 1153, 1159, 1162, 1165, 1177, 1184, 1191, 1205, 1215, 1227, 1234, 1253, 1256, 1260, 1264, 1268, 1273, 1278, 1283, 1288, 1302, 1313, 1319, 1322, 1327, 1336, 1340, 1345, 1350, 1356, 1363, 1368, 1371, 1380, 1396, 1399, 1405, 1414, 1423, 1437, 1447, 1455, 1459, 1468, 1472, 1484, 1487, 1497, 1500, 1507, 1515, 1522, 1525, 1532, 1535, 1540, 1546, 1554, 1560, 1566, 1574, 1579, 1586, 1593, 1601, 1608, 1613, 1618, 1625, 1629, 1632, 1634, 1638, 1641, 1646, 1651, 1656, 1660, 1663, 1665, 1669, 1673, 1677, 1683, 1686, 1689, 1692, 1715, 1741, 1770, 1800, 1806, 1810, 1814, 1818, 1822, 1828, 1834, 1840, 1846, 1855, 1869, 1878, 1887, 1896, 1902, 1908, 1917, 1924, 1932, 1939, 1946, 1953, 1960, 1967, 1974, 1982, 1989, 1995, 2001, 2007, 2014, 2021, 2028, 2035, 2044, 2053, 2062, 2071, 2079, 2086, 2094, 2102, 2108} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 61, 68, 75, 85, 97, 102, 105, 110, 119, 128, 137, 141, 151, 163, 172, 180, 188, 196, 204, 211, 221, 231, 239, 249, 260, 268, 278, 296, 311, 318, 330, 337, 344, 355, 367, 375, 379, 383, 389, 394, 402, 407, 413, 417, 426, 444, 452, 459, 463, 467, 481, 487, 491, 495, 504, 508, 512, 517, 522, 526, 530, 534, 541, 545, 548, 554, 557, 560, 570, 580, 593, 606, 610, 621, 625, 639, 656, 659, 669, 680, 686, 694, 705, 713, 725, 741, 755, 766, 776, 791, 799, 810, 820, 827, 836, 846, 850, 853, 860, 865, 876, 883, 890, 898, 901, 907, 912, 921, 928, 936, 940, 943, 949, 956, 969, 974, 976, 983, 990, 996, 1000, 1009, 1013, 1018, 1024, 1030, 1036, 1046, 1049, 1065, 1069, 1078, 1081, 1090, 1105, 1118, 1124, 1138, 1145, 1148, 1153, 1159, 1162, 1165, 1177, 1184, 1191, 1205, 1215, 1227, 1234, 1253, 1256, 1260, 1264, 1268, 1273, 1278, 1283, 1288, 1302, 1313, 1319, 1322, 1327, 1336, 1340, 1345, 1350, 1356, 1363, 1368, 1371, 1380, 1396, 1399, 1405, 1414, 1423, 1437, 1447, 1455, 1459, 1468, 1472, 1484, 1487, 1497, 1500, 1507, 1515, 1522, 1525, 1532, 1535, 1540, 1546, 1554, 1560, 1566, 1574, 1579, 1586, 1593, 1601, 1608, 1613, 1618, 1625, 1629, 1632, 1634, 1638, 1641, 1646, 1651, 1656, 1660, 1663, 1665, 1669, 1673, 1677, 1683, 1686, 1689, 1692, 1696, 1698, 1702, 1705, 1711, 1718, 1724, 1728, 1732, 1736, 1740, 1743, 1747, 1770, 1796, 1825, 1855, 1861, 1865, 1869, 1873, 1877, 1883, 1889, 1895, 1901, 1910, 1924, 1933, 1942, 1951, 1957, 1963, 1972, 1979, 1987, 1994, 2001, 2008, 2015, 2022, 2029, 2037, 2044, 2050, 2056, 2062, 2069, 2076, 2083, 2090, 2099, 2108, 2117, 2126, 2134, 2141, 2149, 2157, 2163} func (i FeatureID) String() string { idx := int(i) - 0 diff --git a/go.mod b/go.mod index 84cac4d..75c8c16 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/klauspost/cpuid/v2 -go 1.22 +go 1.24.0 -require golang.org/x/sys v0.30.0 +require golang.org/x/sys v0.41.0 diff --git a/go.sum b/go.sum index 241f4ca..cdc9b1c 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,2 @@ -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/os_darwin_arm64.go b/os_darwin_arm64.go index da07522..addbfc6 100644 --- a/os_darwin_arm64.go +++ b/os_darwin_arm64.go @@ -126,4 +126,17 @@ func tryToFillCPUInfoFomSysctl(c *CPUInfo) { setFeature(c, SM3, "hw.optional.arm.FEAT_SM3") // SM3 instructions setFeature(c, SM4, "hw.optional.arm.FEAT_SM4") // SM4 instructions setFeature(c, SVE, "hw.optional.arm.FEAT_SVE") // Scalable Vector Extension + setFeature(c, SVE2, "hw.optional.arm.FEAT_SVE2") // Scalable Vector Extension 2 + setFeature(c, SB, "hw.optional.arm.FEAT_SB") // Speculation barrier + setFeature(c, SSBS, "hw.optional.arm.FEAT_SSBS") // Speculative Store Bypass Safe + setFeature(c, BTI, "hw.optional.arm.FEAT_BTI") // Branch Target Identification + setFeature(c, FLAGM2, "hw.optional.arm.FEAT_FlagM2") // Condition flag manipulation version 2 + setFeature(c, FRINTTS, "hw.optional.arm.FEAT_FRINTTS") // Floating-point to integer rounding + setFeature(c, DCPODP, "hw.optional.arm.FEAT_DPB2") // Data cache clean to Point of Deep Persistence + setFeature(c, BF16, "hw.optional.arm.FEAT_BF16") // BFloat16 instructions + setFeature(c, I8MM, "hw.optional.arm.FEAT_I8MM") // Int8 matrix multiplication + setFeature(c, WFXT, "hw.optional.arm.FEAT_WFxT") // WFE/WFI with timeout + setFeature(c, MOPS, "hw.optional.arm.FEAT_MOPS") // Memory copy and set instructions + setFeature(c, HBC, "hw.optional.arm.FEAT_HBC") // Hinted conditional branches + setFeature(c, CSSC, "hw.optional.arm.FEAT_CSSC") // Common short sequence compression } diff --git a/os_linux_arm64.go b/os_linux_arm64.go index d96d244..73629e2 100644 --- a/os_linux_arm64.go +++ b/os_linux_arm64.go @@ -157,7 +157,7 @@ func detectOS(c *CPUInfo) bool { case _AT_HWCAP: hwcap = val case _AT_HWCAP2: - // Not used + hwcap2 = val } } if hwcap == 0 { @@ -184,9 +184,9 @@ func detectOS(c *CPUInfo) bool { c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT) c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC) c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL) - c.featureSet.setIf(isSet(hwcap, hwcap2_RNG), RNDR) - // c.featureSet.setIf(isSet(hwcap, hwcap_), TLB) - // c.featureSet.setIf(isSet(hwcap, hwcap_), TS) + c.featureSet.setIf(isSet(hwcap2, hwcap2_RNG), RNDR) + // TLB (FEAT_TLBIOS/TLBIRANGE) has no HWCAP bit; only detectable via ID registers. + c.featureSet.setIf(isSet(hwcap, hwcap_FLAGM), TS) c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1) c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2) c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3) @@ -194,6 +194,21 @@ func detectOS(c *CPUInfo) bool { c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3) c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4) c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE) + c.featureSet.setIf(isSet(hwcap, hwcap_SB), SB) + c.featureSet.setIf(isSet(hwcap, hwcap_SSBS), SSBS) + + // Features reported through the second hardware capability word (HWCAP2). + c.featureSet.setIf(isSet(hwcap2, hwcap2_SVE2), SVE2) + c.featureSet.setIf(isSet(hwcap2, hwcap2_BTI), BTI) + c.featureSet.setIf(isSet(hwcap2, hwcap2_FLAGM2), FLAGM2) + c.featureSet.setIf(isSet(hwcap2, hwcap2_FRINT), FRINTTS) + c.featureSet.setIf(isSet(hwcap2, hwcap2_DCPODP), DCPODP) + c.featureSet.setIf(isSet(hwcap2, hwcap2_BF16), BF16) + c.featureSet.setIf(isSet(hwcap2, hwcap2_I8MM), I8MM) + c.featureSet.setIf(isSet(hwcap2, hwcap2_WFXT), WFXT) + c.featureSet.setIf(isSet(hwcap2, hwcap2_MOPS), MOPS) + c.featureSet.setIf(isSet(hwcap2, hwcap2_HBC), HBC) + c.featureSet.setIf(isSet(hwcap2, hwcap2_CSSC), CSSC) // The Samsung S9+ kernel reports support for atomics, but not all cores // actually support them, resulting in SIGILL. See issue #28431. diff --git a/os_safe_linux_arm64.go b/os_safe_linux_arm64.go index 5b4e8a1..c3608bd 100644 --- a/os_safe_linux_arm64.go +++ b/os_safe_linux_arm64.go @@ -5,3 +5,4 @@ package cpuid var hwcap uint +var hwcap2 uint diff --git a/os_unsafe_linux_arm64.go b/os_unsafe_linux_arm64.go index 00158c2..a389c0e 100644 --- a/os_unsafe_linux_arm64.go +++ b/os_unsafe_linux_arm64.go @@ -8,3 +8,6 @@ import _ "unsafe" // needed for go:linkname //go:linkname hwcap internal/cpu.HWCap var hwcap uint + +//go:linkname hwcap2 internal/cpu.HWCap2 +var hwcap2 uint From 1a453c3900291ad5b902e7c91f5c686a23755861 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 16 Jun 2026 15:53:14 +0200 Subject: [PATCH 2/3] Use alternative hwcap2, remove ssbs from level check. --- README.md | 101 ++++++++++++++++++++++----------------- cmd/cpuid/main.go | 4 +- cpuid.go | 27 +++++++---- cpuid_test.go | 1 + os_linux_arm64.go | 67 +++++++++++++------------- os_safe_linux_arm64.go | 1 - os_unsafe_linux_arm64.go | 3 -- 7 files changed, 112 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 7269c30..a99bbdb 100644 --- a/README.md +++ b/README.md @@ -506,6 +506,19 @@ Exit Code 1 | SM3 | SM3 instructions | | SM4 | SM4 instructions | | SVE | Scalable Vector Extension | +| SVE2 | Scalable Vector Extension 2 | +| SB | Speculation barrier (SB instruction) | +| SSBS | Speculative Store Bypass Safe (PSTATE.SSBS) | +| BTI | Branch Target Identification | +| FLAGM2 | Condition flag manipulation version 2 (AXFLAG, XAFLAG) | +| FRINTTS | Floating-point to integer rounding (FRINT32Z, FRINT64Z, etc) | +| DCPODP | Data cache clean to Point of Deep Persistence (DC CVADP) | +| BF16 | BFloat16 instructions (BFDOT, BFMMLA, etc) | +| I8MM | Int8 matrix multiplication (SMMLA, UMMLA, USMMLA) | +| WFXT | WFE/WFI with timeout (WFET, WFIT) | +| MOPS | Memory copy and set instructions (CPYF, SETP, etc) | +| HBC | Hinted conditional branches (BC.cond) | +| CSSC | Common short sequence compression (ABS, SMAX, UMAX, etc) | ## riscv64 feature detection @@ -517,50 +530,50 @@ Other cache and topology information is not yet available. # RISC-V features: -| Feature Flag | Description | -|------------------|----------------------------------------------------------| -| RV_IMA | IMA base (Integer, Multiply, Atomic) | -| RV_C | Compressed instructions | -| RV_F | Single-precision FP | -| RV_D | Double-precision FP | -| RV_V | Vector extension (V) | -| RV_ZBA | Address generation | -| RV_ZBB | Basic bit manipulation | -| RV_ZBC | Carry-less multiplication | -| RV_ZBS | Single-bit manipulation | -| RV_ZICOND | Integer conditional operations | -| RV_ZIHINTPAUSE | Pause hint | -| RV_ZICBOM | Cache block management operations | -| RV_ZICBOZ | Cache block zero | -| RV_ZICBOP | Cache block prefetch | -| RV_ZFA | Additional floating-point | -| RV_ZFH | Half-precision FP | -| RV_ZFHMIN | Minimal half-precision FP | -| RV_ZTSO | Total store ordering | -| RV_ZACAS | Atomic CAS | -| RV_ZBKB | Bit-manipulation for crypto | -| RV_ZBKC | Carry-less multiply for crypto | -| RV_ZBKX | Crossbar permutations | -| RV_ZKND | NIST Suite: AES decrypt | -| RV_ZKNE | NIST Suite: AES encrypt | -| RV_ZKNH | NIST Suite: SHA-2 (SHA-256/SHA-512) | -| RV_ZKSED | ShangMi Suite: SM4 block cipher | -| RV_ZKSH | ShangMi Suite: SM3 hash | -| RV_ZKT | Data-independent execution latency (Crypto) | -| RV_ZKN | NIST Algorithm Suite (combined from individual) | -| RV_ZKS | ShangMi Algorithm Suite (combined from individual) | -| RV_ZVBB | Vector Basic Bit-manipulation | -| RV_ZVBC | Vector Carry-less multiply | -| RV_ZVKB | Vector Bit-manipulation for crypto | -| RV_ZVKG | Vector GCM/GMAC | -| RV_ZVKNED | NIST Suite: Vector AES encrypt+decrypt | -| RV_ZVKNHA | NIST Suite: Vector SHA-2 (SHA-256) | -| RV_ZVKNHB | NIST Suite: Vector SHA-2 (SHA-512) | -| RV_ZVKSED | ShangMi Suite: Vector SM4 | -| RV_ZVKSH | ShangMi Suite: Vector SM3 hash | -| RV_ZVKT | Vector Data-independent execution latency | -| RV_ZVKNG | NIST Suite with GCM (combined from individual) | -| RV_ZVKSG | ShangMi Suite with GCM (combined from individual) | +| Feature Flag | Description | +|----------------|----------------------------------------------------| +| RV_IMA | IMA base (Integer, Multiply, Atomic) | +| RV_C | Compressed instructions | +| RV_F | Single-precision FP | +| RV_D | Double-precision FP | +| RV_V | Vector extension (V) | +| RV_ZBA | Address generation | +| RV_ZBB | Basic bit manipulation | +| RV_ZBC | Carry-less multiplication | +| RV_ZBS | Single-bit manipulation | +| RV_ZICOND | Integer conditional operations | +| RV_ZIHINTPAUSE | Pause hint | +| RV_ZICBOM | Cache block management operations | +| RV_ZICBOZ | Cache block zero | +| RV_ZICBOP | Cache block prefetch | +| RV_ZFA | Additional floating-point | +| RV_ZFH | Half-precision FP | +| RV_ZFHMIN | Minimal half-precision FP | +| RV_ZTSO | Total store ordering | +| RV_ZACAS | Atomic CAS | +| RV_ZBKB | Bit-manipulation for crypto | +| RV_ZBKC | Carry-less multiply for crypto | +| RV_ZBKX | Crossbar permutations | +| RV_ZKND | NIST Suite: AES decrypt | +| RV_ZKNE | NIST Suite: AES encrypt | +| RV_ZKNH | NIST Suite: SHA-2 (SHA-256/SHA-512) | +| RV_ZKSED | ShangMi Suite: SM4 block cipher | +| RV_ZKSH | ShangMi Suite: SM3 hash | +| RV_ZKT | Data-independent execution latency (Crypto) | +| RV_ZKN | NIST Algorithm Suite (combined from individual) | +| RV_ZKS | ShangMi Algorithm Suite (combined from individual) | +| RV_ZVBB | Vector Basic Bit-manipulation | +| RV_ZVBC | Vector Carry-less multiply | +| RV_ZVKB | Vector Bit-manipulation for crypto | +| RV_ZVKG | Vector GCM/GMAC | +| RV_ZVKNED | NIST Suite: Vector AES encrypt+decrypt | +| RV_ZVKNHA | NIST Suite: Vector SHA-2 (SHA-256) | +| RV_ZVKNHB | NIST Suite: Vector SHA-2 (SHA-512) | +| RV_ZVKSED | ShangMi Suite: Vector SM4 | +| RV_ZVKSH | ShangMi Suite: Vector SM3 hash | +| RV_ZVKT | Vector Data-independent execution latency | +| RV_ZVKNG | NIST Suite with GCM (combined from individual) | +| RV_ZVKSG | ShangMi Suite with GCM (combined from individual) | # license diff --git a/cmd/cpuid/main.go b/cmd/cpuid/main.go index e9c4be5..5c29e70 100644 --- a/cmd/cpuid/main.go +++ b/cmd/cpuid/main.go @@ -68,7 +68,9 @@ func main() { fmt.Println("Logical Cores:", cpuid.CPU.LogicalCores) fmt.Println("CPU Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model, "Stepping:", cpuid.CPU.Stepping) fmt.Println("Features:", strings.Join(cpuid.CPU.FeatureSet(), ",")) - fmt.Println("Microarchitecture level:", cpuid.CPU.X64Level()) + if x := cpuid.CPU.X64Level(); x > 0 { + fmt.Println("Microarchitecture level:", x) + } if cpuid.CPU.AVX10Level > 0 { fmt.Println("AVX10 level:", cpuid.CPU.AVX10Level) } diff --git a/cpuid.go b/cpuid.go index 97cdaf6..7877afc 100644 --- a/cpuid.go +++ b/cpuid.go @@ -600,18 +600,25 @@ var rvZVKSFeatures = CombineFeatures(RV_ZVKSED, RV_ZVKSH, RV_ZVKG, RV_ZVKB, RV_Z // package can detect. EL1/system-only features (PAN, VHE, CSV2/CSV3, ECV, ...) // are excluded since they are irrelevant to user-space code generation, exactly // as X64Level ignores non-instruction features. +// +// FEAT_SSBS, although mandatory from ARMv8.5, is intentionally NOT required: it +// is a speculation-control feature that Go code generation never depends on, and +// it is reported unreliably. Linux ties HWCAP_SSBS to the SSB mitigation state +// and hypervisors frequently hide it from guests, so requiring it would cap +// otherwise-capable v8.5+/v9 CPUs (e.g. Neoverse N2) at v8.4. SSBS is still +// detected and reported through FeatureSet when present. // https://go.dev/wiki/MinimumRequirements#arm64 var armV8Levels = [...]Features{ - CombineFeatures(FP, ASIMD), // v8.0 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP), // v8.5 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9 + CombineFeatures(FP, ASIMD), // v8.0 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP), // v8.5 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9 } // armCrypto matches the GOARM64 ",crypto" option: FEAT_AES, FEAT_PMULL, diff --git a/cpuid_test.go b/cpuid_test.go index 5221f9d..6ed8581 100644 --- a/cpuid_test.go +++ b/cpuid_test.go @@ -418,6 +418,7 @@ func TestARM64Level(t *testing.T) { {"v8.3", v83, 8, 3}, {"v8.4", v84, 8, 4}, {"v8.5", v85, 8, 5}, + {"v8.5-without-ssbs", addFeats(v84, SB, BTI, FRINTTS, FLAGM2, DCPODP), 8, 5}, {"v8.6", v86, 8, 6}, {"v9.0", addFeats(v85, SVE2), 9, 0}, {"v9.1", addFeats(v86, SVE2), 9, 1}, diff --git a/os_linux_arm64.go b/os_linux_arm64.go index 73629e2..ea9da40 100644 --- a/os_linux_arm64.go +++ b/os_linux_arm64.go @@ -115,15 +115,19 @@ const ( hwcap2_POE = 1 << 63 ) +// hwcap2 holds AT_HWCAP2. Unlike hwcap, the arm64 runtime does not expose it +// through internal/cpu, so detectOS reads it from the auxiliary vector. +var hwcap2 uint + func detectOS(c *CPUInfo) bool { // For now assuming no hyperthreading is reasonable. c.LogicalCores = runtime.NumCPU() c.PhysicalCores = c.LogicalCores c.ThreadsPerCore = 1 - if hwcap == 0 { - // We did not get values from the runtime. - // Try reading /proc/self/auxv - + // hwcap is provided by the runtime through the internal/cpu.HWCap linkname, + // but the runtime does not expose HWCAP2 on arm64. Read the auxiliary vector + // directly to obtain hwcap2 (and hwcap when the linkname is unavailable). + if hwcap == 0 || hwcap2 == 0 { // From https://github.com/golang/sys const ( _AT_HWCAP = 16 @@ -132,38 +136,35 @@ func detectOS(c *CPUInfo) bool { uintSize = int(32 << (^uint(0) >> 63)) ) - buf, err := ioutil.ReadFile("/proc/self/auxv") - if err != nil { - // e.g. on android /proc/self/auxv is not accessible, so silently - // ignore the error and leave Initialized = false. On some - // architectures (e.g. arm64) doinit() implements a fallback - // readout and will set Initialized = true again. - return false - } - bo := binary.LittleEndian - for len(buf) >= 2*(uintSize/8) { - var tag, val uint - switch uintSize { - case 32: - tag = uint(bo.Uint32(buf[0:])) - val = uint(bo.Uint32(buf[4:])) - buf = buf[8:] - case 64: - tag = uint(bo.Uint64(buf[0:])) - val = uint(bo.Uint64(buf[8:])) - buf = buf[16:] + // e.g. on android /proc/self/auxv is not accessible, so silently ignore + // the error and fall back to whatever the runtime provided. + if buf, err := ioutil.ReadFile("/proc/self/auxv"); err == nil { + bo := binary.LittleEndian + for len(buf) >= 2*(uintSize/8) { + var tag, val uint + switch uintSize { + case 32: + tag = uint(bo.Uint32(buf[0:])) + val = uint(bo.Uint32(buf[4:])) + buf = buf[8:] + case 64: + tag = uint(bo.Uint64(buf[0:])) + val = uint(bo.Uint64(buf[8:])) + buf = buf[16:] + } + switch tag { + case _AT_HWCAP: + hwcap = val + case _AT_HWCAP2: + hwcap2 = val + } } - switch tag { - case _AT_HWCAP: - hwcap = val - case _AT_HWCAP2: - hwcap2 = val - } - } - if hwcap == 0 { - return false } } + if hwcap == 0 { + // Nothing detected, e.g. on android or a restricted environment. + return false + } // HWCap was populated by the runtime from the auxiliary vector. // Use HWCap information since reading aarch64 system registers diff --git a/os_safe_linux_arm64.go b/os_safe_linux_arm64.go index c3608bd..5b4e8a1 100644 --- a/os_safe_linux_arm64.go +++ b/os_safe_linux_arm64.go @@ -5,4 +5,3 @@ package cpuid var hwcap uint -var hwcap2 uint diff --git a/os_unsafe_linux_arm64.go b/os_unsafe_linux_arm64.go index a389c0e..00158c2 100644 --- a/os_unsafe_linux_arm64.go +++ b/os_unsafe_linux_arm64.go @@ -8,6 +8,3 @@ import _ "unsafe" // needed for go:linkname //go:linkname hwcap internal/cpu.HWCap var hwcap uint - -//go:linkname hwcap2 internal/cpu.HWCap2 -var hwcap2 uint From d702c81a62b08327fbe089ba9d69e0f651016747 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 16 Jun 2026 16:17:08 +0200 Subject: [PATCH 3/3] Also remove FEAT_BTI from level detection. --- cpuid.go | 34 ++++++++++++++++++---------------- cpuid_test.go | 2 +- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cpuid.go b/cpuid.go index 7877afc..39c9089 100644 --- a/cpuid.go +++ b/cpuid.go @@ -601,24 +601,26 @@ var rvZVKSFeatures = CombineFeatures(RV_ZVKSED, RV_ZVKSH, RV_ZVKG, RV_ZVKB, RV_Z // are excluded since they are irrelevant to user-space code generation, exactly // as X64Level ignores non-instruction features. // -// FEAT_SSBS, although mandatory from ARMv8.5, is intentionally NOT required: it -// is a speculation-control feature that Go code generation never depends on, and -// it is reported unreliably. Linux ties HWCAP_SSBS to the SSB mitigation state -// and hypervisors frequently hide it from guests, so requiring it would cap -// otherwise-capable v8.5+/v9 CPUs (e.g. Neoverse N2) at v8.4. SSBS is still -// detected and reported through FeatureSet when present. +// FEAT_SSBS and FEAT_BTI, although mandatory from ARMv8.5, are intentionally NOT +// required. Both are OS-policy-gated security features (speculative store bypass +// safety and branch-target identification) that Go code generation never depends +// on: their HWCAP/sysctl bits are set only when the OS or hypervisor enables the +// protection, not purely from CPU capability, so they are routinely hidden even +// on capable silicon (neither a Neoverse N2 Linux guest nor Apple Silicon reports +// them). Requiring them would cap such CPUs at v8.4. Both are still detected and +// reported through FeatureSet when present. // https://go.dev/wiki/MinimumRequirements#arm64 var armV8Levels = [...]Features{ - CombineFeatures(FP, ASIMD), // v8.0 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP), // v8.5 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8 - CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, BTI, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9 + CombineFeatures(FP, ASIMD), // v8.0 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP), // v8.5 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8 + CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9 } // armCrypto matches the GOARM64 ",crypto" option: FEAT_AES, FEAT_PMULL, diff --git a/cpuid_test.go b/cpuid_test.go index 6ed8581..9ece8d5 100644 --- a/cpuid_test.go +++ b/cpuid_test.go @@ -418,7 +418,7 @@ func TestARM64Level(t *testing.T) { {"v8.3", v83, 8, 3}, {"v8.4", v84, 8, 4}, {"v8.5", v85, 8, 5}, - {"v8.5-without-ssbs", addFeats(v84, SB, BTI, FRINTTS, FLAGM2, DCPODP), 8, 5}, + {"v8.5-without-ssbs-bti", addFeats(v84, SB, FRINTTS, FLAGM2, DCPODP), 8, 5}, {"v8.6", v86, 8, 6}, {"v9.0", addFeats(v85, SVE2), 9, 0}, {"v9.1", addFeats(v86, SVE2), 9, 1},