Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@ jobs:

- name: Test Noasm
run: go test -tags=noasm ./...


- name: Run cpuid command
run: go run cmd/cpuid/main.go

- name: Run cpuid command noasm
run: go run -tags=noasm cmd/cpuid/main.go

build-special:
env:
CGO_ENABLED: 0
Expand Down
101 changes: 57 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,19 @@ Exit Code 1
| SM3 | SM3 instructions |
| SM4 | SM4 instructions |
| SVE | Scalable Vector Extension |
| SVE2 | Scalable Vector Extension 2 |
| SB | Speculation barrier (SB instruction) |
| SSBS | Speculative Store Bypass Safe (PSTATE.SSBS) |
| BTI | Branch Target Identification |
| FLAGM2 | Condition flag manipulation version 2 (AXFLAG, XAFLAG) |
| FRINTTS | Floating-point to integer rounding (FRINT32Z, FRINT64Z, etc) |
Comment thread
klauspost marked this conversation as resolved.
| DCPODP | Data cache clean to Point of Deep Persistence (DC CVADP) |
| BF16 | BFloat16 instructions (BFDOT, BFMMLA, etc) |
| I8MM | Int8 matrix multiplication (SMMLA, UMMLA, USMMLA) |
| WFXT | WFE/WFI with timeout (WFET, WFIT) |
| MOPS | Memory copy and set instructions (CPYF, SETP, etc) |
| HBC | Hinted conditional branches (BC.cond) |
| CSSC | Common short sequence compression (ABS, SMAX, UMAX, etc) |

## riscv64 feature detection

Expand All @@ -517,50 +530,50 @@ Other cache and topology information is not yet available.

# RISC-V features:

| Feature Flag | Description |
|------------------|----------------------------------------------------------|
| RV_IMA | IMA base (Integer, Multiply, Atomic) |
| RV_C | Compressed instructions |
| RV_F | Single-precision FP |
| RV_D | Double-precision FP |
| RV_V | Vector extension (V) |
| RV_ZBA | Address generation |
| RV_ZBB | Basic bit manipulation |
| RV_ZBC | Carry-less multiplication |
| RV_ZBS | Single-bit manipulation |
| RV_ZICOND | Integer conditional operations |
| RV_ZIHINTPAUSE | Pause hint |
| RV_ZICBOM | Cache block management operations |
| RV_ZICBOZ | Cache block zero |
| RV_ZICBOP | Cache block prefetch |
| RV_ZFA | Additional floating-point |
| RV_ZFH | Half-precision FP |
| RV_ZFHMIN | Minimal half-precision FP |
| RV_ZTSO | Total store ordering |
| RV_ZACAS | Atomic CAS |
| RV_ZBKB | Bit-manipulation for crypto |
| RV_ZBKC | Carry-less multiply for crypto |
| RV_ZBKX | Crossbar permutations |
| RV_ZKND | NIST Suite: AES decrypt |
| RV_ZKNE | NIST Suite: AES encrypt |
| RV_ZKNH | NIST Suite: SHA-2 (SHA-256/SHA-512) |
| RV_ZKSED | ShangMi Suite: SM4 block cipher |
| RV_ZKSH | ShangMi Suite: SM3 hash |
| RV_ZKT | Data-independent execution latency (Crypto) |
| RV_ZKN | NIST Algorithm Suite (combined from individual) |
| RV_ZKS | ShangMi Algorithm Suite (combined from individual) |
| RV_ZVBB | Vector Basic Bit-manipulation |
| RV_ZVBC | Vector Carry-less multiply |
| RV_ZVKB | Vector Bit-manipulation for crypto |
| RV_ZVKG | Vector GCM/GMAC |
| RV_ZVKNED | NIST Suite: Vector AES encrypt+decrypt |
| RV_ZVKNHA | NIST Suite: Vector SHA-2 (SHA-256) |
| RV_ZVKNHB | NIST Suite: Vector SHA-2 (SHA-512) |
| RV_ZVKSED | ShangMi Suite: Vector SM4 |
| RV_ZVKSH | ShangMi Suite: Vector SM3 hash |
| RV_ZVKT | Vector Data-independent execution latency |
| RV_ZVKNG | NIST Suite with GCM (combined from individual) |
| RV_ZVKSG | ShangMi Suite with GCM (combined from individual) |
| Feature Flag | Description |
|----------------|----------------------------------------------------|
| RV_IMA | IMA base (Integer, Multiply, Atomic) |
| RV_C | Compressed instructions |
| RV_F | Single-precision FP |
| RV_D | Double-precision FP |
| RV_V | Vector extension (V) |
| RV_ZBA | Address generation |
| RV_ZBB | Basic bit manipulation |
| RV_ZBC | Carry-less multiplication |
| RV_ZBS | Single-bit manipulation |
| RV_ZICOND | Integer conditional operations |
| RV_ZIHINTPAUSE | Pause hint |
| RV_ZICBOM | Cache block management operations |
| RV_ZICBOZ | Cache block zero |
| RV_ZICBOP | Cache block prefetch |
| RV_ZFA | Additional floating-point |
| RV_ZFH | Half-precision FP |
| RV_ZFHMIN | Minimal half-precision FP |
| RV_ZTSO | Total store ordering |
| RV_ZACAS | Atomic CAS |
| RV_ZBKB | Bit-manipulation for crypto |
| RV_ZBKC | Carry-less multiply for crypto |
| RV_ZBKX | Crossbar permutations |
| RV_ZKND | NIST Suite: AES decrypt |
| RV_ZKNE | NIST Suite: AES encrypt |
| RV_ZKNH | NIST Suite: SHA-2 (SHA-256/SHA-512) |
| RV_ZKSED | ShangMi Suite: SM4 block cipher |
| RV_ZKSH | ShangMi Suite: SM3 hash |
| RV_ZKT | Data-independent execution latency (Crypto) |
| RV_ZKN | NIST Algorithm Suite (combined from individual) |
| RV_ZKS | ShangMi Algorithm Suite (combined from individual) |
| RV_ZVBB | Vector Basic Bit-manipulation |
| RV_ZVBC | Vector Carry-less multiply |
| RV_ZVKB | Vector Bit-manipulation for crypto |
| RV_ZVKG | Vector GCM/GMAC |
| RV_ZVKNED | NIST Suite: Vector AES encrypt+decrypt |
| RV_ZVKNHA | NIST Suite: Vector SHA-2 (SHA-256) |
| RV_ZVKNHB | NIST Suite: Vector SHA-2 (SHA-512) |
| RV_ZVKSED | ShangMi Suite: Vector SM4 |
| RV_ZVKSH | ShangMi Suite: Vector SM3 hash |
| RV_ZVKT | Vector Data-independent execution latency |
| RV_ZVKNG | NIST Suite with GCM (combined from individual) |
| RV_ZVKSG | ShangMi Suite with GCM (combined from individual) |

# license

Expand Down
9 changes: 8 additions & 1 deletion cmd/cpuid/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ func main() {
Features []string
X64Level int
RVProfile int
GOARM64 string `json:"GOARM64,omitempty"`
}{
CPUInfo: cpuid.CPU,
Features: cpuid.CPU.FeatureSet(),
X64Level: cpuid.CPU.X64Level(),
RVProfile: cpuid.CPU.RVProfile(),
GOARM64: cpuid.CPU.GOARM64(),
}
b, err := json.MarshalIndent(info, "", " ")
if err != nil {
Expand All @@ -66,13 +68,18 @@ func main() {
fmt.Println("Logical Cores:", cpuid.CPU.LogicalCores)
fmt.Println("CPU Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model, "Stepping:", cpuid.CPU.Stepping)
fmt.Println("Features:", strings.Join(cpuid.CPU.FeatureSet(), ","))
fmt.Println("Microarchitecture level:", cpuid.CPU.X64Level())
if x := cpuid.CPU.X64Level(); x > 0 {
fmt.Println("Microarchitecture level:", x)
}
if cpuid.CPU.AVX10Level > 0 {
fmt.Println("AVX10 level:", cpuid.CPU.AVX10Level)
}
if rvp := cpuid.CPU.RVProfile(); rvp > 0 {
fmt.Printf("RISC-V Profile: RVA%d\n", rvp)
}
if v := cpuid.CPU.GOARM64(); v != "" {
fmt.Println("GOARM64:", v)
}
fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1I, "bytes")
fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
Expand Down
91 changes: 90 additions & 1 deletion cpuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,19 @@ const (
SM3 // SM3 instructions
SM4 // SM4 instructions
SVE // Scalable Vector Extension
SVE2 // Scalable Vector Extension 2
SB // Speculation barrier (SB instruction)
SSBS // Speculative Store Bypass Safe (PSTATE.SSBS)
BTI // Branch Target Identification
FLAGM2 // Condition flag manipulation version 2 (AXFLAG, XAFLAG)
FRINTTS // Floating-point to integer rounding (FRINT32Z, FRINT64Z, etc)
DCPODP // Data cache clean to Point of Deep Persistence (DC CVADP)
BF16 // BFloat16 instructions (BFDOT, BFMMLA, etc)
I8MM // Int8 matrix multiplication (SMMLA, UMMLA, USMMLA)
WFXT // WFE/WFI with timeout (WFET, WFIT)
MOPS // Memory copy and set instructions (CPYF, SETP, etc)
HBC // Hinted conditional branches (BC.cond)
CSSC // Common short sequence compression (ABS, SMAX, UMAX, etc)

// PMU
PMU_FIXEDCOUNTER_CYCLES
Expand Down Expand Up @@ -582,6 +595,38 @@ var rvZKSFeatures = CombineFeatures(RV_ZKSED, RV_ZKSH, RV_ZBKB, RV_ZBKC, RV_ZBKX
var rvZVKNFeatures = CombineFeatures(RV_ZVKNED, RV_ZVKNHB, RV_ZVKG, RV_ZVKB, RV_ZVKT)
var rvZVKSFeatures = CombineFeatures(RV_ZVKSED, RV_ZVKSH, RV_ZVKG, RV_ZVKB, RV_ZVKT)

// ARM64 architecture levels. armV8Levels[m] is the cumulative set of mandatory
// user-space instruction features added up to and including ARMv8.m that this
// package can detect. EL1/system-only features (PAN, VHE, CSV2/CSV3, ECV, ...)
// are excluded since they are irrelevant to user-space code generation, exactly
// as X64Level ignores non-instruction features.
//
// FEAT_SSBS and FEAT_BTI, although mandatory from ARMv8.5, are intentionally NOT
// required. Both are OS-policy-gated security features (speculative store bypass
// safety and branch-target identification) that Go code generation never depends
// on: their HWCAP/sysctl bits are set only when the OS or hypervisor enables the
// protection, not purely from CPU capability, so they are routinely hidden even
// on capable silicon (neither a Neoverse N2 Linux guest nor Apple Silicon reports
// them). Requiring them would cap such CPUs at v8.4. Both are still detected and
// reported through FeatureSet when present.
// https://go.dev/wiki/MinimumRequirements#arm64
var armV8Levels = [...]Features{
CombineFeatures(FP, ASIMD), // v8.0
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM), // v8.1
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP), // v8.2
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC), // v8.3
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS), // v8.4
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP), // v8.5
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM), // v8.6
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT), // v8.7
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC), // v8.8
CombineFeatures(FP, ASIMD, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC, TS, SB, FRINTTS, FLAGM2, DCPODP, BF16, I8MM, WFXT, MOPS, HBC, CSSC), // v8.9
}

// armCrypto matches the GOARM64 ",crypto" option: FEAT_AES, FEAT_PMULL,
// FEAT_SHA1 and FEAT_SHA256.
var armCrypto = CombineFeatures(AESARM, PMULL, SHA1, SHA2)

// X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
Expand All @@ -606,7 +651,7 @@ func (c CPUInfo) X64Level() int {

// RVProfile returns the RISC-V application profile level.
// 0 = unknown / base ISA only, 20 = RVA20, 22 = RVA22, 23 = RVA23.
// Returns 0 on non-RISC-V architectures.
// Returns 0 on non-RISC-V architectures or if not detected.
// https://github.com/riscv/riscv-profiles
func (c CPUInfo) RVProfile() int {
switch {
Expand All @@ -621,6 +666,50 @@ func (c CPUInfo) RVProfile() int {
}
}

// ARM64Level returns the ARMv8/ARMv9 architecture version supported by the CPU
// as (major, minor), e.g. 8, 4 for ARMv8.4-A or 9, 0 for ARMv9.0-A.
// Only mandatory user-space instruction features are considered, so the result
// is the highest level whose required instructions are all present.
// Returns 0, 0 on non-arm64 CPUs or when feature detection was unavailable.
func (c CPUInfo) ARM64Level() (major, minor int) {
if !c.featureSet.hasSetP(armV8Levels[0]) {
return 0, 0
}
m8 := 0
for m := len(armV8Levels) - 1; m >= 1; m-- {
if c.featureSet.hasSetP(armV8Levels[m]) {
m8 = m
break
}
}
// ARMv9.x mandates everything in ARMv8.(x+5) plus SVE2.
if m8 >= 5 && c.featureSet.inSet(SVE2) {
return 9, m8 - 5
}
return 8, m8
}

// GOARM64 returns a value usable as the GOARM64 build setting for the detected
// CPU, e.g. "v8.4" or "v9.0,crypto". The ",crypto" suffix is appended when AES,
// PMULL, SHA1 and SHA256 are all present; the ",lse" suffix is appended in the
// rare case LSE is present without the rest of the ARMv8.1 feature set.
// Returns "" on non-arm64 CPUs or when feature detection was unavailable.
// See https://go.dev/wiki/MinimumRequirements#arm64
func (c CPUInfo) GOARM64() string {
major, minor := c.ARM64Level()
if major == 0 {
return ""
}
v := fmt.Sprintf("v%d.%d", major, minor)
if major == 8 && minor == 0 && c.featureSet.inSet(ATOMICS) {
v += ",lse"
}
if c.featureSet.hasSetP(armCrypto) {
v += ",crypto"
}
return v
}

// Disable will disable one or several features.
func (c *CPUInfo) Disable(ids ...FeatureID) bool {
for _, id := range ids {
Expand Down
79 changes: 79 additions & 0 deletions cpuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,85 @@ func TestHasOneOf(t *testing.T) {
}
}

func armCPU(ids ...FeatureID) CPUInfo {
var c CPUInfo
for _, id := range ids {
c.featureSet.set(id)
}
return c
}

// addFeats returns a fresh slice of base plus add, avoiding append aliasing.
func addFeats(base []FeatureID, add ...FeatureID) []FeatureID {
out := make([]FeatureID, 0, len(base)+len(add))
out = append(out, base...)
return append(out, add...)
}

func TestARM64Level(t *testing.T) {
v80 := []FeatureID{FP, ASIMD}
v81 := addFeats(v80, ATOMICS, CRC32, ASIMDRDM)
v82 := addFeats(v81, DCPOP)
v83 := addFeats(v82, JSCVT, FCMA, LRCPC)
v84 := addFeats(v83, TS)
v85 := addFeats(v84, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP)
v86 := addFeats(v85, BF16, I8MM)

tests := []struct {
name string
ids []FeatureID
major, minor int
}{
{"none", nil, 0, 0},
{"fp-without-asimd", []FeatureID{FP}, 0, 0},
{"v8.0", v80, 8, 0},
{"v8.1", v81, 8, 1},
{"v8.1-incomplete", addFeats(v80, ATOMICS, CRC32), 8, 0},
{"v8.2", v82, 8, 2},
{"v8.3", v83, 8, 3},
{"v8.4", v84, 8, 4},
{"v8.5", v85, 8, 5},
{"v8.5-without-ssbs-bti", addFeats(v84, SB, FRINTTS, FLAGM2, DCPODP), 8, 5},
{"v8.6", v86, 8, 6},
{"v9.0", addFeats(v85, SVE2), 9, 0},
{"v9.1", addFeats(v86, SVE2), 9, 1},
{"sve2-without-v8.5", addFeats(v80, SVE2), 8, 0},
}
for _, tc := range tests {
c := armCPU(tc.ids...)
if maj, min := c.ARM64Level(); maj != tc.major || min != tc.minor {
t.Errorf("%s: ARM64Level() = %d.%d, want %d.%d", tc.name, maj, min, tc.major, tc.minor)
}
}
}

func TestGOARM64(t *testing.T) {
v80 := []FeatureID{FP, ASIMD}
v83 := addFeats(v80, ATOMICS, CRC32, ASIMDRDM, DCPOP, JSCVT, FCMA, LRCPC)
v85 := addFeats(v83, TS, SB, SSBS, BTI, FRINTTS, FLAGM2, DCPODP)

tests := []struct {
name string
ids []FeatureID
want string
}{
{"none", nil, ""},
{"v8.0", v80, "v8.0"},
{"v8.0+lse", addFeats(v80, ATOMICS), "v8.0,lse"},
{"v8.0+crypto", addFeats(v80, AESARM, PMULL, SHA1, SHA2), "v8.0,crypto"},
{"v8.0+lse+crypto", []FeatureID{FP, ASIMD, ATOMICS, AESARM, PMULL, SHA1, SHA2}, "v8.0,lse,crypto"},
{"v8.3", v83, "v8.3"},
{"v8.3+crypto", addFeats(v83, AESARM, PMULL, SHA1, SHA2), "v8.3,crypto"},
{"v9.0", addFeats(v85, SVE2), "v9.0"},
}
for _, tc := range tests {
c := armCPU(tc.ids...)
if got := c.GOARM64(); got != tc.want {
t.Errorf("%s: GOARM64() = %q, want %q", tc.name, got, tc.want)
}
}
}

func TestParseISAString(t *testing.T) {
tests := []struct {
isa string
Expand Down
8 changes: 8 additions & 0 deletions detect_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ func addInfo(c *CPUInfo, safe bool) {
f.setIf(instAttrReg0&(0xf<<60) != 0, RNDR)
f.setIf(instAttrReg0&(0xf<<56) != 0, TLB)
f.setIf(instAttrReg0&(0xf<<52) != 0, TS)
f.setIf(instAttrReg0&(0xf<<52) == 2<<52, FLAGM2) // TS == 0b0010 (FEAT_FlagM2)
f.setIf(instAttrReg0&(0xf<<48) != 0, FHM)
f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
Expand Down Expand Up @@ -243,6 +244,13 @@ func addInfo(c *CPUInfo, safe bool) {
// fmt.Println("APA")
// }
f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
f.setIf(instAttrReg1&(0xf<<0) == 2<<0, DCPODP) // DPB == 0b0010 (FEAT_DPB2)

// Upper ID_AA64ISAR1_EL1 fields, not in the table above.
f.setIf(instAttrReg1&(0xf<<32) != 0, FRINTTS) // bits [35:32]
f.setIf(instAttrReg1&(0xf<<36) != 0, SB) // bits [39:36]
f.setIf(instAttrReg1&(0xf<<44) != 0, BF16) // bits [47:44]
f.setIf(instAttrReg1&(0xf<<52) != 0, I8MM) // bits [55:52]

// Store
c.featureSet.or(f)
Expand Down
Loading
Loading