spicetools/external/cpu_features/include/cpuinfo_aarch64.h

// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

////////////////////////////////////////////////////////////////////////////////
// A note on Windows AArch64 implementation
////////////////////////////////////////////////////////////////////////////////

// Getting cpu info via EL1 system registers is not possible, so we delegate it
// to the Windows API (i.e., IsProcessorFeaturePresent and GetNativeSystemInfo).
// The `implementer`, `variant` and `part` fields of the `Aarch64Info` struct
// are not used, so they are set to 0. To get `revision` we use
// `wProcessorRevision` from `SYSTEM_INFO`.
//
// Cryptographic Extension:
// -----------------------------------------------------------------------------
// According to documentation Arm Architecture Reference Manual for
// A-profile architecture. A2.3 The Armv8 Cryptographic Extension. The Armv8.0
// Cryptographic Extension provides instructions for the acceleration of
// encryption and decryption, and includes the following features: FEAT_AES,
// FEAT_PMULL, FEAT_SHA1, FEAT_SHA256.
// see: https://developer.arm.com/documentation/ddi0487/latest
//
// We use `PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE` to detect all Armv8.0 crypto
// features. This value reports all features or nothing, so even if you only
// have support FEAT_AES and FEAT_PMULL, it will still return false.
//
// From Armv8.2, an implementation of the Armv8.0 Cryptographic Extension can
// include either or both of:
//
// • The AES functionality, including support for multiplication of 64-bit
//   polynomials. The ID_AA64ISAR0_EL1.AES field indicates whether this
//   functionality is supported.
// • The SHA1 and SHA2-256 functionality. The ID_AA64ISAR0_EL1.{SHA2, SHA1}
//   fields indicate whether this functionality is supported.
//
// ID_AA64ISAR0_EL1.AES, bits [7:4]:
// Indicates support for AES instructions in AArch64 state. Defined values are:
// - 0b0000 No AES instructions implemented.
// - 0b0001 AESE, AESD, AESMC, and AESIMC instructions implemented.
// - 0b0010 As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit
//   data quantities.
//
// FEAT_AES implements the functionality identified by the value 0b0001.
// FEAT_PMULL implements the functionality identified by the value 0b0010.
// From Armv8, the permitted values are 0b0000 and 0b0010.
//
// ID_AA64ISAR0_EL1.SHA1, bits [11:8]:
// Indicates support for SHA1 instructions in AArch64 state. Defined values are:
// - 0b0000 No SHA1 instructions implemented.
// - 0b0001 SHA1C, SHA1P, SHA1M, SHA1H, SHA1SU0, and SHA1SU1 instructions
//   implemented.
//
// FEAT_SHA1 implements the functionality identified by the value 0b0001.
// From Armv8, the permitted values are 0b0000 and 0b0001.
// If the value of ID_AA64ISAR0_EL1.SHA2 is 0b0000, this field must have the
// value 0b0000.
//
// ID_AA64ISAR0_EL1.SHA2, bits [15:12]:
// Indicates support for SHA2 instructions in AArch64 state. Defined values are:
// - 0b0000 No SHA2 instructions implemented.
// - 0b0001 Implements instructions: SHA256H, SHA256H2, SHA256SU0, and
//   SHA256SU1.
// - 0b0010 Implements instructions:
//          • SHA256H, SHA256H2, SHA256SU0, and SHA256SU1.
//          • SHA512H, SHA512H2, SHA512SU0, and SHA512SU1.
//
// FEAT_SHA256 implements the functionality identified by the value 0b0001.
// FEAT_SHA512 implements the functionality identified by the value 0b0010.
//
// In Armv8, the permitted values are 0b0000 and 0b0001.
// From Armv8.2, the permitted values are 0b0000, 0b0001, and 0b0010.
//
// If the value of ID_AA64ISAR0_EL1.SHA1 is 0b0000, this field must have the
// value 0b0000.
//
// If the value of this field is 0b0010, ID_AA64ISAR0_EL1.SHA3
// must have the value 0b0001.
//
// Other cryptographic features that we cannot detect such as sha512, sha3, sm3,
// sm4, sveaes, svepmull, svesha3, svesm4 we set to 0.
//
// FP/SIMD:
// -----------------------------------------------------------------------------
// FP/SIMD must be implemented on all Armv8.0 implementations, but
// implementations targeting specialized markets may support the following
// combinations:
//
// • No NEON or floating-point.
// • Full floating-point and SIMD support with exception trapping.
// • Full floating-point and SIMD support without exception trapping.
//
// ref:
// https://developer.arm.com/documentation/den0024/a/AArch64-Floating-point-and-NEON
//
// So, we use `PF_ARM_VFP_32_REGISTERS_AVAILABLE`,
// `PF_ARM_NEON_INSTRUCTIONS_AVAILABLE` to detect `asimd` and `fp`

#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_

#include "cpu_features_cache_info.h"
#include "cpu_features_macros.h"

CPU_FEATURES_START_CPP_NAMESPACE

typedef struct {
  int fp : 1;          // Floating-point.
  int asimd : 1;       // Advanced SIMD.
  int evtstrm : 1;     // Generic timer generated events.
  int aes : 1;         // Hardware-accelerated Advanced Encryption Standard.
  int pmull : 1;       // Polynomial multiply long.
  int sha1 : 1;        // Hardware-accelerated SHA1.
  int sha2 : 1;        // Hardware-accelerated SHA2-256.
  int crc32 : 1;       // Hardware-accelerated CRC-32.
  int atomics : 1;     // Armv8.1 atomic instructions.
  int fphp : 1;        // Half-precision floating point support.
  int asimdhp : 1;     // Advanced SIMD half-precision support.
  int cpuid : 1;       // Access to certain ID registers.
  int asimdrdm : 1;    // Rounding Double Multiply Accumulate/Subtract.
  int jscvt : 1;       // Support for JavaScript conversion.
  int fcma : 1;        // Floating point complex numbers.
  int lrcpc : 1;       // Support for weaker release consistency.
  int dcpop : 1;       // Data persistence writeback.
  int sha3 : 1;        // Hardware-accelerated SHA3.
  int sm3 : 1;         // Hardware-accelerated SM3.
  int sm4 : 1;         // Hardware-accelerated SM4.
  int asimddp : 1;     // Dot product instruction.
  int sha512 : 1;      // Hardware-accelerated SHA512.
  int sve : 1;         // Scalable Vector Extension.
  int asimdfhm : 1;    // Additional half-precision instructions.
  int dit : 1;         // Data independent timing.
  int uscat : 1;       // Unaligned atomics support.
  int ilrcpc : 1;      // Additional support for weaker release consistency.
  int flagm : 1;       // Flag manipulation instructions.
  int ssbs : 1;        // Speculative Store Bypass Safe PSTATE bit.
  int sb : 1;          // Speculation barrier.
  int paca : 1;        // Address authentication.
  int pacg : 1;        // Generic authentication.
  int dcpodp : 1;      // Data cache clean to point of persistence.
  int sve2 : 1;        // Scalable Vector Extension (version 2).
  int sveaes : 1;      // SVE AES instructions.
  int svepmull : 1;    // SVE polynomial multiply long instructions.
  int svebitperm : 1;  // SVE bit permute instructions.
  int svesha3 : 1;     // SVE SHA3 instructions.
  int svesm4 : 1;      // SVE SM4 instructions.
  int flagm2 : 1;      // Additional flag manipulation instructions.
  int frint : 1;       // Floating point to integer rounding.
  int svei8mm : 1;     // SVE Int8 matrix multiplication instructions.
  int svef32mm : 1;    // SVE FP32 matrix multiplication instruction.
  int svef64mm : 1;    // SVE FP64 matrix multiplication instructions.
  int svebf16 : 1;     // SVE BFloat16 instructions.
  int i8mm : 1;        // Int8 matrix multiplication instructions.
  int bf16 : 1;        // BFloat16 instructions.
  int dgh : 1;         // Data Gathering Hint instruction.
  int rng : 1;         // True random number generator support.
  int bti : 1;         // Branch target identification.
  int mte : 1;         // Memory tagging extension.
  int ecv : 1;         // Enhanced counter virtualization.
  int afp : 1;         // Alternate floating-point behaviour.
  int rpres : 1;       // 12-bit reciprocal (square root) estimate precision.
  int mte3 : 1;        // MTE asymmetric fault handling.
  int sme : 1;         // Scalable Matrix Extension.
  int smei16i64 : 1;   // 16-bit to 64-bit integer widening outer product.
  int smef64f64 : 1;   // FP64 to FP64 outer product.
  int smei8i32 : 1;    // 8-bit to 32-bit integer widening outer product.
  int smef16f32 : 1;   // FP16 to FP32 outer product.
  int smeb16f32 : 1;   // BFloat16 to FP32 outper product.
  int smef32f32 : 1;   // FP32 to FP32 outer product.
  int smefa64 : 1;     // Full A64 support for SME in streaming mode.
  int wfxt : 1;        // WFE and WFI with timeout.
  int ebf16 : 1;       // Extended BFloat16 instructions.
  int sveebf16 : 1;    // SVE BFloat16 instructions.
  int cssc : 1;        // Common short sequence compression instructions.
  int rprfm : 1;       // Range Prefetch Memory hint instruction.
  int sve2p1 : 1;      // Scalable Vector Extension (version 2.1).
  int sme2 : 1;        // Scalable Matrix Extension (version 2).
  int sme2p1 : 1;      // Scalable Matrix Extension (version 2.1).
  int smei16i32 : 1;   // 16-bit to 64-bit integer widening outer product.
  int smebi32i32 : 1;  // 1-bit binary to 32-bit integer outer product.
  int smeb16b16 : 1;   // SME2.1 BFloat16 instructions.
  int smef16f16 : 1;   // FP16 to FP16 outer product.

  // Make sure to update Aarch64FeaturesEnum below if you add a field here.
} Aarch64Features;

typedef struct {
  Aarch64Features features;
  int implementer;  // We set 0 for Windows.
  int variant;      // We set 0 for Windows.
  int part;         // We set 0 for Windows.
  int revision;     // We use GetNativeSystemInfo to get processor revision for
                    // Windows.
} Aarch64Info;

Aarch64Info GetAarch64Info(void);

////////////////////////////////////////////////////////////////////////////////
// Introspection functions

typedef enum {
  AARCH64_FP,
  AARCH64_ASIMD,
  AARCH64_EVTSTRM,
  AARCH64_AES,
  AARCH64_PMULL,
  AARCH64_SHA1,
  AARCH64_SHA2,
  AARCH64_CRC32,
  AARCH64_ATOMICS,
  AARCH64_FPHP,
  AARCH64_ASIMDHP,
  AARCH64_CPUID,
  AARCH64_ASIMDRDM,
  AARCH64_JSCVT,
  AARCH64_FCMA,
  AARCH64_LRCPC,
  AARCH64_DCPOP,
  AARCH64_SHA3,
  AARCH64_SM3,
  AARCH64_SM4,
  AARCH64_ASIMDDP,
  AARCH64_SHA512,
  AARCH64_SVE,
  AARCH64_ASIMDFHM,
  AARCH64_DIT,
  AARCH64_USCAT,
  AARCH64_ILRCPC,
  AARCH64_FLAGM,
  AARCH64_SSBS,
  AARCH64_SB,
  AARCH64_PACA,
  AARCH64_PACG,
  AARCH64_DCPODP,
  AARCH64_SVE2,
  AARCH64_SVEAES,
  AARCH64_SVEPMULL,
  AARCH64_SVEBITPERM,
  AARCH64_SVESHA3,
  AARCH64_SVESM4,
  AARCH64_FLAGM2,
  AARCH64_FRINT,
  AARCH64_SVEI8MM,
  AARCH64_SVEF32MM,
  AARCH64_SVEF64MM,
  AARCH64_SVEBF16,
  AARCH64_I8MM,
  AARCH64_BF16,
  AARCH64_DGH,
  AARCH64_RNG,
  AARCH64_BTI,
  AARCH64_MTE,
  AARCH64_ECV,
  AARCH64_AFP,
  AARCH64_RPRES,
  AARCH64_MTE3,
  AARCH64_SME,
  AARCH64_SME_I16I64,
  AARCH64_SME_F64F64,
  AARCH64_SME_I8I32,
  AARCH64_SME_F16F32,
  AARCH64_SME_B16F32,
  AARCH64_SME_F32F32,
  AARCH64_SME_FA64,
  AARCH64_WFXT,
  AARCH64_EBF16,
  AARCH64_SVE_EBF16,
  AARCH64_CSSC,
  AARCH64_RPRFM,
  AARCH64_SVE2P1,
  AARCH64_SME2,
  AARCH64_SME2P1,
  AARCH64_SME_I16I32,
  AARCH64_SME_BI32I32,
  AARCH64_SME_B16B16,
  AARCH64_SME_F16F16,
  AARCH64_LAST_,
} Aarch64FeaturesEnum;

int GetAarch64FeaturesEnumValue(const Aarch64Features* features,
                                Aarch64FeaturesEnum value);

const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum);

CPU_FEATURES_END_CPP_NAMESPACE

#if !defined(CPU_FEATURES_ARCH_AARCH64)
#error "Including cpuinfo_aarch64.h from a non-aarch64 target."
#endif

#endif  // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_