From 6e8dec575993a680931f87c978f087ddf6286be9 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 16 Jun 2026 09:43:11 -0700 Subject: [PATCH 1/3] Add STM32 bare-metal Hash/SAES/PKA/RNG support --- .wolfssl_known_macro_extras | 52 +- wolfcrypt/src/aes.c | 274 ++--- wolfcrypt/src/ecc.c | 76 +- wolfcrypt/src/port/st/README.md | 164 ++- wolfcrypt/src/port/st/stm32.c | 1795 ++++++++++++++++++++++++++++- wolfcrypt/src/random.c | 235 +++- wolfssl/wolfcrypt/aes.h | 6 +- wolfssl/wolfcrypt/port/st/stm32.h | 571 ++++++++- wolfssl/wolfcrypt/settings.h | 89 +- 9 files changed, 2914 insertions(+), 348 deletions(-) diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 840bc74f7d1..917a9958d0a 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -1,4 +1,10 @@ +AES +AES1 +AES_CR_CCFC AES_GCM_GMULT_NCT +AES_ICR_CCF +AES_ISR_CCF +AES_SR_CCF AFX_RESOURCE_DLL AFX_TARG_ENU ALLOW_BINARY_MISMATCH_INTROSPECTION @@ -271,7 +277,11 @@ HARDWARE_CACHE_COHERENCY HASH_AlgoMode_HASH HASH_AlgoMode_HMAC HASH_BYTE_SWAP +HASH_CR_ALGO_1 +HASH_CR_DATATYPE_0 +HASH_CR_DATATYPE_1 HASH_CR_LKEY +HASH_CR_MODE HASH_DIGEST HASH_DataType_8b HASH_IMR_DCIE @@ -496,7 +506,11 @@ OTHER_BOARD O_CLOEXEC PEER_INFO PERF_FLAG_FD_CLOEXEC +PKA_CLRFR_OPERRFC +PKA_CR_OPERRIE PKA_ECC_SCALAR_MUL_IN_B_COEFF +PKA_SR_INITOK +PKA_SR_OPERRF PLATFORMIO PLUTON_CRYPTO_ECC PRINT_SESSION_STATS @@ -504,6 +518,25 @@ PTHREAD_STACK_MIN QAT_ENABLE_HASH QAT_ENABLE_RNG QAT_USE_POLLING_CHECK +RCC_AHB1ENR_PKAEN +RCC_AHB2ENR1_AESEN +RCC_AHB2ENR1_HASHEN +RCC_AHB2ENR1_PKAEN +RCC_AHB2ENR1_SAESEN +RCC_AHB2ENR_AESEN +RCC_AHB2ENR_HASHEN +RCC_AHB2ENR_PKAEN +RCC_AHB2ENR_SAESEN +RCC_AHB2RSTR_PKARST +RCC_AHB3ENR_AESEN +RCC_AHB3ENR_CRYPEN +RCC_AHB3ENR_HASHEN +RCC_AHB3ENR_PKAEN +RCC_AHB3ENR_RNGEN +RCC_AHB3ENR_SAESEN +RCC_MP_AHB5ENSETR_CRYP1EN +RCC_MP_AHB5ENSETR_HASH1EN +RCC_MP_AHB5ENSETR_RNG1EN RC_NO_RNG REDIRECTION_IN3_KEYELMID REDIRECTION_IN3_KEYID @@ -514,10 +547,18 @@ REDIRECTION_OUT2_KEYID RENESAS_T4_USE RHEL_MAJOR RHEL_RELEASE_CODE +RNG_CAND_NIST_CR_VALUE +RNG_CAND_NIST_HTCR_VALUE +RNG_CAND_NIST_NSCR_VALUE +RNG_CR_CONDRST +RNG_SR_BUSY RTC_ALARMSUBSECONDMASK_ALL RTE_CMSIS_RTOS_RTX RTOS_MODULE_NET_AVAIL RTPLATFORM +SAES +SAES_CR_EN +SAES_S SAL_IOMMU_CODE SA_INTERRUPT SCEKEY_INSTALLED @@ -580,6 +621,7 @@ STM32WB55xx STM32WBA52xx STM32WL55xx STM32_AESGCM_PARTIAL +STM32_AES_CLEAR_INST STM32_HW_CLOCK_AUTO STM32_NUTTX_RNG STSAFE_HOST_KEY_CIPHER @@ -687,6 +729,11 @@ WC_SLHDSA_KERNEL_ASM WC_SLHDSA_NO_ASM WC_SLHDSA_VERBOSE_DEBUG WC_SSIZE_TYPE +WC_STM32_PKA_DIAG +WC_STM32_RNG_CED_DISABLE +WC_STM32_RNG_DIAG +WC_STM32_RNG_NO_NIST_INIT +WC_STM32_SAES_DIAG WC_STRICT_SIG WC_USE_PIE_FENCEPOSTS_FOR_FIPS WC_WANT_FLAG_DONT_USE_VECTOR_OPS @@ -932,9 +979,10 @@ WOLFSSL_SP_ARM32_UDIV WOLFSSL_SP_FAST_NCT_EXPTMOD WOLFSSL_SP_INT_SQR_VOLATILE WOLFSSL_STACK_CHECK +WOLFSSL_STM32C5 +WOLFSSL_STM32F3 WOLFSSL_STM32F427_RNG -WOLFSSL_STM32U5_DHUK -WOLFSSL_STM32_RNG_NOLIB +WOLFSSL_STM32U0 WOLFSSL_STRONGEST_HASH_SIG WOLFSSL_STSAFE_TAKES_SLOT WOLFSSL_TELIT_M2MB diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 6806acbc965..155934c510e 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -233,6 +233,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesEncrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal driver handles mutex, clock and key/IV internally. + * DHUK is routed via the crypto-callback framework, not here. */ + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -247,50 +252,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits return ret; #endif - #ifdef WOLFSSL_STM32U5_DHUK - ret = wolfSSL_CryptHwMutexLock(); - if (ret != 0) - return ret; - - /* Handle making use of wrapped key */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { - CRYP_ConfigTypeDef Config = {0}; - - ret = wc_Stm32_Aes_UnWrap(aes, &hcryp, (const byte*)aes->key, - aes->keylen, aes->dhukIV, aes->dhukIVLen); - if (ret != HAL_OK) { - WOLFSSL_MSG("Error with DHUK key unwrap"); - ret = BAD_FUNC_ARG; - } - /* reconfigure for using unwrapped key now */ - HAL_CRYP_GetConfig(&hcryp, &Config); - Config.KeyMode = CRYP_KEYMODE_NORMAL; - Config.KeySelect = CRYP_KEYSEL_NORMAL; - Config.Algorithm = CRYP_AES_ECB; - Config.DataType = CRYP_DATATYPE_8B; - Config.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; - HAL_CRYP_SetConfig(&hcryp, &Config); - } - else { - ret = wc_Stm32_Aes_Init(aes, &hcryp, 1); - if (ret == 0) { - hcryp.Init.Algorithm = CRYP_AES_ECB; - ret = HAL_CRYP_Init(&hcryp); - if (ret != HAL_OK) { - ret = BAD_FUNC_ARG; - } - } - } - - if (ret == HAL_OK) { - ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, WC_AES_BLOCK_SIZE, - (uint32_t*)outBlock, STM32_HAL_TIMEOUT); - if (ret != HAL_OK) { - ret = WC_TIMEOUT_E; - } - } - HAL_CRYP_DeInit(&hcryp); - #elif defined(WOLFSSL_STM32_CUBEMX) + #if defined(WOLFSSL_STM32_CUBEMX) ret = wc_Stm32_Aes_Init(aes, &hcryp, 0); if (ret != 0) return ret; @@ -373,6 +335,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ @@ -381,6 +344,10 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesDecrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + /* DHUK is routed via the crypto-callback framework, not here. */ + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 0); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -395,51 +362,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits return ret; #endif - #ifdef WOLFSSL_STM32U5_DHUK - ret = wolfSSL_CryptHwMutexLock(); - if (ret != 0) - return ret; - - /* Handle making use of wrapped key */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { - CRYP_ConfigTypeDef Config; - - XMEMSET(&Config, 0, sizeof(Config)); - ret = wc_Stm32_Aes_UnWrap(aes, &hcryp, (const byte*)aes->key, - aes->keylen, aes->dhukIV, aes->dhukIVLen); - if (ret != HAL_OK) { - WOLFSSL_MSG("Error with DHUK unwrap"); - ret = BAD_FUNC_ARG; - } - /* reconfigure for using unwrapped key now */ - HAL_CRYP_GetConfig(&hcryp, &Config); - Config.KeyMode = CRYP_KEYMODE_NORMAL; - Config.KeySelect = CRYP_KEYSEL_NORMAL; - Config.Algorithm = CRYP_AES_ECB; - Config.DataType = CRYP_DATATYPE_8B; - Config.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; - HAL_CRYP_SetConfig(&hcryp, &Config); - } - else { - ret = wc_Stm32_Aes_Init(aes, &hcryp, 1); - if (ret == 0) { - hcryp.Init.Algorithm = CRYP_AES_ECB; - ret = HAL_CRYP_Init(&hcryp); - if (ret != HAL_OK) { - ret = BAD_FUNC_ARG; - } - } - } - - if (ret == HAL_OK) { - ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, WC_AES_BLOCK_SIZE, - (uint32_t*)outBlock, STM32_HAL_TIMEOUT); - if (ret != HAL_OK) { - ret = WC_TIMEOUT_E; - } - } - HAL_CRYP_DeInit(&hcryp); - #elif defined(WOLFSSL_STM32_CUBEMX) + #if defined(WOLFSSL_STM32_CUBEMX) ret = wc_Stm32_Aes_Init(aes, &hcryp, 0); if (ret != 0) return ret; @@ -527,6 +450,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ @@ -5785,141 +5709,39 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef HAVE_AES_CBC #if defined(STM32_CRYPTO) -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_STM32_BARE int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - int ret = 0; - CRYP_HandleTypeDef hcryp; - word32 blocks = (sz / WC_AES_BLOCK_SIZE); - -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS if (sz % WC_AES_BLOCK_SIZE) { return BAD_LENGTH_E; } -#endif - if (blocks == 0) + #endif + if (sz == 0) { return 0; - - ret = wolfSSL_CryptHwMutexLock(); - if (ret != 0) { - return ret; - } - - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { - CRYP_ConfigTypeDef Config; - - XMEMSET(&Config, 0, sizeof(Config)); - ret = wc_Stm32_Aes_UnWrap(aes, &hcryp, (const byte*)aes->key, aes->keylen, - (const byte*)aes->dhukIV, aes->dhukIVLen); - - /* reconfigure for using unwrapped key now */ - HAL_CRYP_GetConfig(&hcryp, &Config); - Config.KeyMode = CRYP_KEYMODE_NORMAL; - Config.KeySelect = CRYP_KEYSEL_NORMAL; - Config.Algorithm = CRYP_AES_CBC; - ByteReverseWords(aes->reg, aes->reg, WC_AES_BLOCK_SIZE); - Config.pInitVect = (STM_CRYPT_TYPE*)aes->reg; - HAL_CRYP_SetConfig(&hcryp, &Config); - } - else { - ret = wc_Stm32_Aes_Init(aes, &hcryp, 1); - if (ret != 0) { - wolfSSL_CryptHwMutexUnLock(); - return ret; - } - hcryp.Init.Algorithm = CRYP_AES_CBC; - ByteReverseWords(aes->reg, aes->reg, WC_AES_BLOCK_SIZE); - hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; - ret = HAL_CRYP_Init(&hcryp); - } - - if (ret == HAL_OK) { - ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, blocks * WC_AES_BLOCK_SIZE, - (uint32_t*)out, STM32_HAL_TIMEOUT); - if (ret != HAL_OK) { - ret = WC_TIMEOUT_E; - } - - /* store iv for next call */ - XMEMCPY(aes->reg, out + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); } - - HAL_CRYP_DeInit(&hcryp); - - wolfSSL_CryptHwMutexUnLock(); - wc_Stm32_Aes_Cleanup(); - - return ret; + /* DHUK is routed via the crypto-callback framework, not here. + * wc_Stm32_Aes_Cbc processes whole blocks and ignores any sub-block + * remainder, matching the SW / CUBEMX CBC backends; define + * WOLFSSL_AES_CBC_LENGTH_CHECKS (above) to reject a non-block-multiple + * length with BAD_LENGTH_E instead. */ + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 1); } #ifdef HAVE_AES_DECRYPT int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - int ret = 0; - CRYP_HandleTypeDef hcryp; - word32 blocks = (sz / WC_AES_BLOCK_SIZE); - -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS if (sz % WC_AES_BLOCK_SIZE) { return BAD_LENGTH_E; } -#endif - if (blocks == 0) + #endif + if (sz == 0) { return 0; - - ret = wolfSSL_CryptHwMutexLock(); - if (ret != 0) { - return ret; - } - - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { - CRYP_ConfigTypeDef Config; - - XMEMSET(&Config, 0, sizeof(Config)); - ret = wc_Stm32_Aes_UnWrap(aes, &hcryp, (const byte*)aes->key, aes->keylen, - aes->dhukIV, aes->dhukIVLen); - - /* reconfigure for using unwrapped key now */ - HAL_CRYP_GetConfig(&hcryp, &Config); - Config.KeyMode = CRYP_KEYMODE_NORMAL; - Config.KeySelect = CRYP_KEYSEL_NORMAL; - Config.Algorithm = CRYP_AES_CBC; - ByteReverseWords(aes->reg, aes->reg, WC_AES_BLOCK_SIZE); - Config.pInitVect = (STM_CRYPT_TYPE*)aes->reg; - HAL_CRYP_SetConfig(&hcryp, &Config); - } - else { - ret = wc_Stm32_Aes_Init(aes, &hcryp, 1); - if (ret != 0) { - wolfSSL_CryptHwMutexUnLock(); - return ret; - } - hcryp.Init.Algorithm = CRYP_AES_CBC; - ByteReverseWords(aes->reg, aes->reg, WC_AES_BLOCK_SIZE); - hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; - ret = HAL_CRYP_Init(&hcryp); - } - - if (ret == HAL_OK) { - /* if input and output same will overwrite input iv */ - XMEMCPY(aes->tmp, in + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); - ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, blocks * WC_AES_BLOCK_SIZE, - (uint32_t*)out, STM32_HAL_TIMEOUT); - if (ret != HAL_OK) { - ret = WC_TIMEOUT_E; - } - - /* store iv for next call */ - XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE); } - - HAL_CRYP_DeInit(&hcryp); - wolfSSL_CryptHwMutexUnLock(); - wc_Stm32_Aes_Cleanup(); - - return ret; + /* DHUK is routed via the crypto-callback framework, not here. */ + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 0); } #endif /* HAVE_AES_DECRYPT */ - #elif defined(WOLFSSL_STM32_CUBEMX) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { @@ -7176,6 +6998,27 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) { + #ifdef WOLFSSL_STM32_BARE + /* CTR per-block transform: produce out = in XOR AES_ECB(counter). + * ECB-encrypt the counter aes->reg into a keystream block, then XOR + * with the plaintext 'in'. The caller (XTRANSFORM_AESCTRBLOCK loop) + * does not XOR and increments aes->reg after this returns. */ + byte ks[WC_AES_BLOCK_SIZE]; + int ret; + ret = wc_Stm32_Aes_Ecb(aes, ks, (const byte*)aes->reg, + WC_AES_BLOCK_SIZE, 1); + if (ret == 0) { + xorbufout(out, in, ks, WC_AES_BLOCK_SIZE); + } + else { + /* The XTRANSFORM_AESCTRBLOCK macro discards this return; zero + * the block so a failed HW ECB does not leave stale/prior + * plaintext in the output. */ + ForceZero(out, WC_AES_BLOCK_SIZE); + } + ForceZero(ks, sizeof(ks)); + return ret; + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -7286,6 +7129,7 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) wolfSSL_CryptHwMutexUnLock(); wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } @@ -10466,6 +10310,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, authTag, authTagSz, authIn, authInSz); #endif + #if defined(WOLFSSL_MICROCHIP_TA100) && defined(WOLFSSL_MICROCHIP_AESGCM) #ifndef TA_AES_GCM_MAX_DATA_SIZE #define TA_AES_GCM_MAX_DATA_SIZE 996u @@ -10483,6 +10328,17 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, authIn, authInSz); } #endif + +#if defined(WOLFSSL_STM32_BARE) && defined(STM32_CRYPTO) + ret = wc_Stm32_Aes_Gcm(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, + authIn, authInSz, 1 /* enc */); + if (ret != WC_NO_ERR_TRACE(CRYPTOCB_UNAVAILABLE)) + return ret; + /* fall through to SW GCM (still uses HW AES via wc_AesEncrypt) */ +#endif /* WOLFSSL_STM32_BARE && STM32_CRYPTO */ + + #ifdef STM32_CRYPTO_AES_GCM return wc_AesGcmEncrypt_STM32( aes, out, in, sz, iv, ivSz, @@ -11231,6 +11087,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, } #endif + /* BARE: GCM decrypt always uses SW path (with HW AES blocks via + * wc_AesEncrypt). Encrypt is HW-accelerated above; decrypt + tag + * verification stays in well-tested SW for now. */ + #ifdef STM32_CRYPTO_AES_GCM /* The STM standard peripheral library API's doesn't support partial blocks */ return wc_AesGcmDecrypt_STM32( @@ -14129,7 +13989,7 @@ int wc_AesInit(Aes* aes, void* heap, int devId) aes->heap = heap; -#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_STM32U5_DHUK) +#if defined(WOLF_CRYPTO_CB) aes->devId = devId; aes->devCtx = NULL; #else diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index a3476d4e146..e0d77511eca 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -279,8 +279,9 @@ ECC Curve Sizes: !defined(WOLFSSL_MICROCHIP_TA100) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ !defined(WOLFSSL_KCAPI_ECC) && !defined(WOLFSSL_SE050) && \ - !defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_PSOC6_CRYPTO) && \ - !defined(WOLFSSL_XILINX_CRYPT_VERSAL) + !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && \ + !defined(WOLFSSL_STM32_PKA) && \ + !defined(WOLFSSL_PSOC6_CRYPTO) #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -2731,7 +2732,8 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a, return _ecc_projective_dbl_point(P, R, a, modulus, mp); } -#if !defined(FREESCALE_LTC_ECC) && !defined(WOLFSSL_STM32_PKA) && \ +#if !defined(FREESCALE_LTC_ECC) && \ + (!defined(WOLFSSL_STM32_PKA) || defined(WC_STM32_PKA_VERIFY_ONLY)) && \ !defined(WOLFSSL_CRYPTOCELL) @@ -2991,7 +2993,8 @@ int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp) } #endif /* !WOLFSSL_SP_MATH || WOLFSSL_PUBLIC_ECC_ADD_DBL */ -#if !defined(FREESCALE_LTC_ECC) && !defined(WOLFSSL_STM32_PKA) && \ +#if !defined(FREESCALE_LTC_ECC) && \ + (!defined(WOLFSSL_STM32_PKA) || defined(WC_STM32_PKA_VERIFY_ONLY)) && \ !defined(WOLFSSL_CRYPTOCELL) #if !defined(WOLFSSL_SP_MATH) @@ -7020,12 +7023,49 @@ static int deterministic_sign_helper(const byte* in, word32 inlen, ecc_key* key) #endif /* WOLFSSL_ECDSA_DETERMINISTIC_K || WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT */ -#if defined(WOLFSSL_STM32_PKA) +/* WOLFSSL_STM32_PKA routes HW ECDSA sign/verify through the STM32 PKA + * (HAL_PKA_ECDSASign / Verify). Works under both the CubeMX-HAL path + * and the bare-metal direct-register path (WOLFSSL_STM32_BARE) -- the + * bare-metal driver implements the same HAL_PKA_ECDSA* surface. + * + * The non-FIPS input-validation checks (length range, all-zero digest + * rejection) live inside the SW body of wc_ecc_sign_hash_ex below. + * Since the STM32_PKA branch returns early without reaching them, + * mirror those checks here so HW + SW paths share the same input + * contract. Without this, an all-zero digest reaches the PKA IP and + * succeeds at the HW layer -- the wolfcrypt_test ECC sweep then fails + * at the post-call assertion that expected ECC_BAD_ARG_E for a zero + * digest. */ +/* The STM32H563 "light" PKA can verify but not sign (per ST: H563 + * verify-only, H573 full). WC_STM32_PKA_VERIFY_ONLY routes sign to the + * software path (the #elif branch below) while verify stays on the HW PKA. */ +#if defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_VERIFY_ONLY) int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s) { +#ifndef WC_ALLOW_ECC_ZERO_HASH + byte hashIsZero = 0; + word32 zIdx; +#endif + + if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) { + return ECC_BAD_ARG_E; + } + if ((inlen > WC_MAX_DIGEST_SIZE) || (inlen < WC_MIN_DIGEST_SIZE)) { + return BAD_LENGTH_E; + } +#ifndef WC_ALLOW_ECC_ZERO_HASH + /* reject all 0's hash */ + for (zIdx = 0; zIdx < inlen; zIdx++) + hashIsZero |= in[zIdx]; + if (hashIsZero == 0) + return ECC_BAD_ARG_E; +#endif + return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s); } + + #elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_MICROCHIP_TA100) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_KCAPI_ECC) @@ -8852,7 +8892,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, #ifndef WOLF_CRYPTO_CB_ONLY_ECC -#if !defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_PSOC6_CRYPTO) && \ +#if !defined(WOLFSSL_STM32_PKA) && \ + !defined(WOLFSSL_PSOC6_CRYPTO) && \ !defined(WOLF_CRYPTO_CB_ONLY_ECC) static int wc_ecc_check_r_s_range(ecc_key* key, mp_int* r, mp_int* s) { @@ -9370,6 +9411,29 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) { #if defined(WOLFSSL_STM32_PKA) + /* HW ECDSA verify via STM32 PKA. Works under both the CubeMX-HAL + * and the bare-metal direct-register paths. Mirror the non-FIPS + * input-validation from the SW body below (length range, all-zero + * digest rejection) so HW + SW share the same input contract. */ +#ifndef WC_ALLOW_ECC_ZERO_HASH + byte hashIsZero = 0; + word32 zIdx; +#endif + + if (r == NULL || s == NULL || hash == NULL || res == NULL || key == NULL) { + return ECC_BAD_ARG_E; + } + if ((hashlen > WC_MAX_DIGEST_SIZE) || (hashlen < WC_MIN_DIGEST_SIZE)) { + return BAD_LENGTH_E; + } +#ifndef WC_ALLOW_ECC_ZERO_HASH + /* reject all 0's hash */ + for (zIdx = 0; zIdx < hashlen; zIdx++) + hashIsZero |= hash[zIdx]; + if (hashIsZero == 0) + return ECC_BAD_ARG_E; +#endif + return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); #elif defined(WOLFSSL_PSOC6_CRYPTO) return psoc6_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); diff --git a/wolfcrypt/src/port/st/README.md b/wolfcrypt/src/port/st/README.md index 0bcd0244bdf..74a93ced00e 100644 --- a/wolfcrypt/src/port/st/README.md +++ b/wolfcrypt/src/port/st/README.md @@ -1,41 +1,57 @@ # ST Ports -Support for the STM32 L4, F1, F2, F4, F7 and MP13 on-board crypto hardware -acceleration: - - symmetric AES (ECB/CBC/CTR/GCM) - - MD5/SHA1/SHA224/SHA256 (MP13 does not have MD5 acceleration) - -Support for the STM32 PKA on WB55, H7, MP13 and other devices with on-board -public-key acceleration: - - ECC192/ECC224/ECC256/ECC384 - -Support for the STSAFE-A secure element family via I2C for ECC supporting NIST P-256/P-384 and Brainpool 256/384-bit curves: - - **STSAFE-A100/A110**: Uses ST's proprietary STSAFE-A1xx middleware. Contact us at support@wolfssl.com for integration assistance. - - **STSAFE-A120**: Uses ST's open-source [STSELib](https://github.com/STMicroelectronics/STSELib) (BSD-3 license). - +Support for STM32 on-chip crypto hardware acceleration across the following families: + +| Family flag | Chips / typical NUCLEO board | +|-----------------------|--------------------------------------------------------------------| +| `WOLFSSL_STM32F1` | F1xx | +| `WOLFSSL_STM32F2` | F2xx (RNG only on F207) | +| `WOLFSSL_STM32F4` | F4xx (CRYP / HASH / RNG) | +| `WOLFSSL_STM32F7` | F7xx (CRYP / HASH / RNG) | +| `WOLFSSL_STM32G0` | G0xx (TinyAES / RNG on crypto G0Bx/G0Cx; RNG-only on others) | +| `WOLFSSL_STM32G4` | G4xx (TinyAES / RNG / V1 PKA) | +| `WOLFSSL_STM32H5` | H5xx (HASH / RNG / SAES / V2 PKA / DHUK on H573) | +| `WOLFSSL_STM32H7` | H7xx classic (CRYP / HASH / RNG); H7Ax/H7Bx + H72x are RNG-only | +| `WOLFSSL_STM32H7S` | H7Sx (SAES / HASH / RNG / V2 PKA) | +| `WOLFSSL_STM32L4` | L4xx (TinyAES variants / HASH / RNG / V1 PKA on L4-rev) | +| `WOLFSSL_STM32L5` | L5xx (HASH / RNG / V1 PKA; TinyAES + SAES on L562) | +| `WOLFSSL_STM32U0` | U0xx (TinyAES / RNG only) | +| `WOLFSSL_STM32U3` | U3xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | +| `WOLFSSL_STM32U5` | U5xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | +| `WOLFSSL_STM32WB` | WB55 (TinyAES / RNG / V1 PKA) | +| `WOLFSSL_STM32WBA` | WBA52 (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | +| `WOLFSSL_STM32WL` | WL55 (TinyAES / RNG / V1 PKA) | +| `WOLFSSL_STM32C0` | C0xx (not yet supported in settings.h; SW only) | +| `WOLFSSL_STM32C5` | C5xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | +| `WOLFSSL_STM32N6` | N6xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK; M55 core) | +| `WOLFSSL_STM32MP13` | MP13 (CRYP / HASH / RNG / PKA; Cortex-A7) | +| `WOLFSSL_STM32MP25` | MP25 (not yet supported in settings.h; Cortex-A35 + M33) | + +The port supports three integration flavors: + +- **CubeMX HAL** (`WOLFSSL_STM32_CUBEMX`) -- recommended for most projects. Pairs with ST's CubeMX-generated HAL drivers. This is the legacy default and what STM32 forum tutorials describe. +- **Standard Peripheral Library** (no `WOLFSSL_STM32_CUBEMX`) -- legacy StdPeriLib path, kept for older F1/F2/F4 projects that have not migrated. +- **BARE-metal** (`WOLFSSL_STM32_BARE`) -- direct-register access with zero HAL or StdPeriLib dependency. Designed for wolfBoot / no-OS / FreeRTOS / TrustZone-NS workloads where pulling in the HAL is undesirable. See [wolfssl-examples-stm32/STM32_Bare_Test](https://github.com/wolfSSL/wolfssl-examples-stm32) for a 27-board reference matrix. + +Support for the STSAFE-A secure element family via I2C is documented separately below. For details see our [wolfSSL ST](https://www.wolfssl.com/docs/stm32/) page. ## STM32 Symmetric Acceleration -We support using the STM32 CubeMX and Standard Peripheral Library. +The CRYP IP block (full-size, older families F2/F4/F7/H7-classic/MP13) and the TinyAES IP block (smaller, newer families L4/L5/G4/U0/U3/U5/WB/WBA/WL/H5/C5/N6) are both driven through the same wolfcrypt entry points in `wolfcrypt/src/port/st/stm32.c`. The HASH IP block has its own driver and is independent of the AES block. -### Building +### Enabling -To enable support define one of the following: +Define the appropriate family flag from the table above, plus a build-flavor flag: ``` -#define WOLFSSL_STM32L4 -#define WOLFSSL_STM32F1 -#define WOLFSSL_STM32F2 -#define WOLFSSL_STM32F4 -#define WOLFSSL_STM32F7 +#define WOLFSSL_STM32U5 /* family */ +#define WOLFSSL_STM32_CUBEMX /* or WOLFSSL_STM32_BARE */ ``` -To use CubeMX define `WOLFSSL_STM32_CUBEMX` otherwise StdPeriLib is used. - -To disable portions of the hardware acceleration you can optionally define: +You can selectively disable parts of the HW acceleration: ``` #define NO_STM32_RNG @@ -44,29 +60,111 @@ To disable portions of the hardware acceleration you can optionally define: #define NO_STM32_HMAC ``` -### Coding +If your chip simply does not have an IP block (e.g. H7Ax has no CRYP/HASH; F207 has no CRYP/HASH) the family arm sets the appropriate `NO_STM32_*` defines for you. + +### SAES instance routing -In your application you must include before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h` file. You can find a good reference for this in `IDE/GCC-ARM/Header/user_settings.h`. +Some newer families (H5/H7S/U3/U5/WBA/C5/N6, plus the L562 sub-variant) expose a Secure AES (SAES) instance in addition to (or instead of) a regular AES block. Define `WOLFSSL_STM32_USE_SAES` to route all wolfcrypt AES traffic through SAES via the `WC_STM32_AES_INST` indirection macro. This is required when the regular AES block is TrustZone-gated (H7S3) and is also a prerequisite for DHUK key-wrap on the families in the `WC_STM32_HAS_DHUK` gate (U3/U5/H5/WBA/C5). +### Coding + +Include `` before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h`. A reference is in `IDE/GCC-ARM/Header/user_settings.h`. ### Benchmarks -See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) on the wolfSSL website. +See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) page for canonical numbers. For per-silicon BARE-vs-CubeMX comparisons across the current 27-board NUCLEO matrix, see the bench tables in [wolfssl-examples-stm32/STM32_Bare_Test/README.md](https://github.com/wolfSSL/wolfssl-examples-stm32). ## STM32 PKA (Public Key Acceleration) -STM32 PKA is present in STM32WB55 as well as STM32H7 series. +The STM32 PKA peripheral accelerates ECC scalar multiplication and ECDSA sign/verify. Two distinct IP revisions are in the wild: -### Building +- **V1 PKA** (WB55, WL55, L5, some L4 rev) -- single-curve-at-a-time, limited curve set, slower. Driven via the legacy `WOLFSSL_STM32_PKA` path. +- **V2 PKA** (U3, U5, WBA, H5, H7S, N6, L562, C5) -- larger RAM, more curves, more concurrent operations. Enabled by defining `WOLFSSL_STM32_PKA_V2` on top of `WOLFSSL_STM32_PKA`. + +### Enabling + +``` +#define WOLFSSL_STM32_PKA +#define WOLFSSL_STM32_PKA_V2 /* additionally, for V2 silicon */ +``` -To enable support define the following +### Notes -`WOLFSSL_STM32_PKA` +- On V1 PKA chips the PKA peripheral runs a PKA-RAM clear on first clock-enable and silently rejects `CR.EN` writes during the clear. The wolfcrypt init mirrors HAL's behavior by spinning on `CR.EN` readback up to `WC_STM32_PKA_INIT_TIMEOUT` iterations. This was discovered during L5 bring-up but benefits every PKA chip. +- On V2 PKA the `coefB` parameter must be loaded explicitly (V1 hardware can derive it from the prime). The V2 ECC scalar-multiplication path in `HAL_PKA_ECCMul()` and the ECDSA sign/verify paths both populate it -- see `wolfcrypt/src/port/st/stm32.c`. +- BARE-metal V2 PKA ECDSA sign/verify is work-in-progress -- the single-curve P-256 path is functional but multi-curve sweeps in `wolfcrypt_test` hit a -248 result on some boards. Track this in the wolfssl-examples-stm32 STM32_Bare_Test/README. -### Using -When the support is enabled, the ECC operations will be accelerated using the PKA crypto co-processor. +## STM32 BARE-metal port + +`WOLFSSL_STM32_BARE` selects a direct-register integration with zero HAL or StdPeriLib dependency. Use this for: + +- wolfBoot / no-OS firmware where the HAL footprint is unwelcome. +- TrustZone non-secure applications where the HAL link surface is too broad. +- FreeRTOS or RTX projects that prefer to provide their own clock-tree and UART init. + +The caller is responsible for: + +1. Clock-tree bring-up (HSI/HSE, PLL, voltage scaling, flash latency). +2. UART / VCP bring-up for stdout. +3. Peripheral clock-enable for the IP blocks you use (RNG, CRYP/SAES, HASH, PKA). + +In return wolfcrypt drives the IP-block registers directly. Family-specific arms in `wolfssl/wolfcrypt/port/st/stm32.h` handle the per-chip register-name differences (e.g. `RCC->AHB2ENR` vs `RCC->AHB2ENR1`, `D2CCIP2R` vs `CDCCIP2R`). + +### Enabling + +``` +#define WOLFSSL_STM32_BARE +#define WOLFSSL_STM32U5 /* or any other family flag */ +#define STM32_RNG /* HW IP enables */ +#define STM32_CRYPTO +#define STM32_HASH +``` + +### Per-family HW IP coverage (BARE-metal validation matrix) + +The following table summarizes which IP blocks the BARE path drives on each family currently in the validation matrix. `-` means the silicon does not carry the IP; the corresponding wolfcrypt algorithm falls back to software. + +| Family | Chip example | AES | HASH | RNG | PKA | SAES | DHUK | +|----------|------------------|------------|------|------|-----|------|------| +| F2 | STM32F207ZG | - | - | HW | - | - | - | +| F3 | STM32F303ZE | - | - | - | - | - | - | +| F4 | STM32F437/F439ZI | CRYP | HW | HW | - | - | - | +| F7 | STM32F767ZI | CRYP | HW | HW | - | - | - | +| G0 | STM32G071RB | - | - | - | - | - | - | +| G4 | STM32G491RE | - | - | HW | - | - | - | +| H5 (no SAES) | STM32H563ZI | - | HW | HW | - | - | - | +| H5 (full)| STM32H573ZI | TinyAES | HW | HW | V2 | HW | HW | +| H7 | STM32H753ZI | CRYP | HW | HW | - | - | - | +| H7 RNG | STM32H723/H7A3 | - | - | HW | - | - | - | +| H7S | STM32H7S3L8 | SAES | HW | HW | V2 | HW | - | +| L4 | STM32L4A6ZG | TinyAES | HW | HW | - | - | - | +| L5 (552) | STM32L552ZE-Q | - | HW | HW | V1 | - | - | +| L5 (562) | STM32L562E-DK | TinyAES | HW | HW | V1 | HW | - | +| N6 | STM32N657X0-Q | TinyAES | HW | HW | V2 | HW | HW | +| U0 | STM32U083RC | TinyAES | - | HW | - | - | - | +| U3 | STM32U385RG-Q | TinyAES | HW | HW | V2 | HW | HW | +| U5 | STM32U5xx | TinyAES | HW | HW | V2 | HW | HW | +| WB | STM32WB55RG | TinyAES | - | HW | V1 | - | - | +| WBA | STM32WBA52CG | TinyAES | HW | HW | V2 | HW | HW | +| WL | STM32WL55JC | TinyAES | - | HW | V1 | - | - | +| C0 | STM32C031C6 | - | - | - | - | - | - | +| C5 | STM32C5A3ZG | TinyAES | HW | HW | V2 | HW | HW | + +### Reference example + +See [wolfssl-examples-stm32/STM32_Bare_Test/](https://github.com/wolfSSL/wolfssl-examples-stm32) for a Makefile-based harness covering all of the above families. It exposes three CONFIG flavors (`bare` = HW path, `asm` = WOLFSSL_ARMASM, `c` = software-C baseline) and two TARGETs (`test` = wolfcrypt KAT, `bench` = wolfcrypt benchmark). Per-silicon build-size and benchmark tables live in the example's README. + +### TrustZone-aware silicon recovery + +Some chips ship with `TZEN=1` from the factory (H5, L5, U5). To run a non-secure-only BARE binary on these you must first disable TrustZone via the option bytes: + +``` +STM32_Programmer_CLI -c port=swd sn= mode=UR -ob TZEN=0 +``` + +If a flashed binary commits a bad supply config (e.g. H7Ax `PWR_CR3` first-write-wins lock) and the SWD AP becomes unreachable, recovery is via BOOT0=VDD to reach the ROM bootloader, then mass-erase from there. ## STSAFE-A ECC Acceleration diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 5a94f593bc4..5a7dffc9e15 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -46,6 +46,17 @@ #ifdef WOLFSSL_STM32_PKA #include +#ifdef WOLFSSL_STM32_BARE +/* Bare-metal: CMSIS device header comes from settings.h; the + * PKA_HandleTypeDef and PKA IO typedefs come from port/st/stm32.h. The + * HAL_PKA_* entry points are implemented further down under the matching + * guard. + * + * BARE debug switches (off by default, opt-in at -D; no code unless set): + * WC_STM32_PKA_DIAG -- printf on PKA init / op timeout / OUT_ERROR. + * WC_STM32_SAES_DIAG -- printf on AES/SAES CCF poll timeout + * (DEBUG_STM32_BARE_GCM is a synonym). */ +#else #if defined(WOLFSSL_STM32L5) #include #include @@ -76,7 +87,107 @@ #else #error Please add the hal_pk.h include #endif +#endif /* !WOLFSSL_STM32_BARE */ + +#if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_PKA) + +#include + +/* Bare-metal stand-ins for the slice of HAL surface that wc_ecc_*() and + * the local HAL_PKA_* shims reference. Kept private to this translation + * unit so they don't collide with ST HAL headers in projects that include + * those for non-crypto code. */ +typedef enum { + HAL_OK = 0x00U, + HAL_ERROR = 0x01U, + HAL_BUSY = 0x02U, + HAL_TIMEOUT = 0x03U +} HAL_StatusTypeDef; + +#ifndef HAL_MAX_DELAY +#define HAL_MAX_DELAY 0xFFFFFFFFU +#endif + +typedef struct { + PKA_TypeDef *Instance; + /* V2 PKA clobbers RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] during the + * sign operation -- it cannot be read back to determine the + * result size. Mirror the HAL handle and save the modulus size + * (in bytes) at sign-setup time so GetResult can use it. V1 HAL + * reads from RAM and works fine; V2 HAL keeps it on the handle. */ + uint32_t primeordersize; +} PKA_HandleTypeDef; + +typedef struct { + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coefA; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *primeOrder; /* V2 only */ + uint32_t scalarMulSize; + const uint8_t *scalarMul; + const uint8_t *pointX; + const uint8_t *pointY; +} PKA_ECCMulInTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECCMulOutTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *pPubKeyCurvePtX; + const uint8_t *pPubKeyCurvePtY; + const uint8_t *RSign; + const uint8_t *SSign; + const uint8_t *hash; +} PKA_ECDSAVerifInTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *hash; + const uint8_t *integer; + const uint8_t *privateKey; +} PKA_ECDSASignInTypeDef; + +typedef struct { + uint8_t *RSign; + uint8_t *SSign; +} PKA_ECDSASignOutTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECDSASignOutExtParamTypeDef; + +#endif /* WOLFSSL_STM32_BARE && WOLFSSL_STM32_PKA */ + +#ifdef WOLFSSL_STM32_BARE +/* Provide the global PKA handle that the wc_ecc_mulmod_ex2() and + * stm32_ecc_*_hash_ex() paths reference via &hpka. Under HAL builds, + * the application supplies this; under BARE we own it (file-local). */ +static PKA_HandleTypeDef hpka = { 0 }; +#else extern PKA_HandleTypeDef hpka; +#endif #if !defined(WOLFSSL_STM32_PKA_V2) && defined(PKA_ECC_SCALAR_MUL_IN_B_COEFF) /* PKA hardware like in U5 added coefB and primeOrder */ @@ -85,6 +196,7 @@ extern PKA_HandleTypeDef hpka; #ifdef HAVE_ECC #include +#include #ifndef WOLFSSL_HAVE_ECC_KEY_GET_PRIV /* FIPS build has replaced ecc.h. */ @@ -92,6 +204,640 @@ extern PKA_HandleTypeDef hpka; #define WOLFSSL_HAVE_ECC_KEY_GET_PRIV #endif #endif /* HAVE_ECC */ + +/* Bare-metal HAL_PKA_* shims -- direct-register slice of ST HAL surface + * used by the wolfssl PKA path. V1 layout (WB55/WL/MP13); V2 PKA (H5/ + * U5+PKA/WBA) adds coefB / primeOrder / pointCheck slots at different + * offsets but shares the start sequence and SR/CLRFR bit names, so the + * V2 differences fold into the same code path under WOLFSSL_STM32_PKA_V2 + * (auto-set when the CMSIS header defines PKA_ECC_SCALAR_MUL_IN_B_COEFF). + * Reference: STM32WBxx_HAL_Driver/Src/stm32wbxx_hal_pka.c. */ +#ifdef WOLFSSL_STM32_BARE + +/* PKA RAM occupies addresses PKA_BASE+0x400 .. PKA_BASE+0x11F4 on V1 and + * a slightly larger window on V2. The CMSIS device header sizes the + * RAM[] array correctly for the part. */ +#ifndef PKA_RAM_PARAM_END +/* The HAL macro `__PKA_RAM_PARAM_END(TAB,IDX)` differs by PKA IP rev: + * - V1 PKA (WB / WL / L5): writes a single zero word at IDX. + * - V2 PKA (WBA / U5 / H5 / N6 / C5 / H7S): writes TWO consecutive + * zero words at IDX and IDX+1. + * On V1 the operand RAM slots are packed tightly and a stray second + * zero overwrites the first word of the next operand -- on WL55 this + * silently corrupts ECDSA sign input (HASH_E or PRIVATE_KEY_D depending + * on which operand is being terminated), producing R/S that don't + * verify against their own pubkey. On V2 the slot spacing is wider and + * the spec requires the double-zero terminator (the PKA microcode + * scans until it sees two zeros). + * + * Match the HAL flow exactly by gating on WOLFSSL_STM32_PKA_V2. */ +#ifdef WOLFSSL_STM32_PKA_V2 +#define PKA_RAM_PARAM_END(RAM, IDX) \ + do { \ + (RAM)[(IDX)] = 0UL; \ + (RAM)[(IDX) + 1U] = 0UL; \ + } while (0) +#else +#define PKA_RAM_PARAM_END(RAM, IDX) \ + do { (RAM)[(IDX)] = 0UL; } while (0) +#endif +#endif + +/* Mode encoding constants (from stm32wbxx_hal_pka.h and equivalent). + * Same numeric values across V1 and V2. */ +#ifndef PKA_MODE_ECC_MUL +#define PKA_MODE_ECC_MUL (0x00000020U) +#endif +#ifndef PKA_MODE_ECDSA_VERIFICATION +#define PKA_MODE_ECDSA_VERIFICATION (0x00000026U) +#endif +#ifndef PKA_MODE_ECDSA_SIGNATURE +#define PKA_MODE_ECDSA_SIGNATURE (0x00000024U) +#endif + +/* Success-code sentinel for RAM[PKA_ECDSA_SIGN_OUT_ERROR] and + * RAM[PKA_ECDSA_VERIF_OUT_RESULT]. V1 PKA (WB / WL / L5) uses 0 == OK. + * V2 PKA (WBA / U5 / H5 / N6 / C5 / H7S) uses 0xD60D == PKA_NO_ERROR; + * 0 is NOT success on V2. Other documented V2 error codes: + * 0xCBC9 = PKA_FAILED_COMPUTATION + * 0xA3B7 = PKA_RPART_SIGNATURE_NULL + * 0xF946 = PKA_SPART_SIGNATURE_NULL + */ +#ifdef WOLFSSL_STM32_PKA_V2 +#define WC_STM32_PKA_OK_CODE 0xD60DUL +#else +#define WC_STM32_PKA_OK_CODE 0UL +#endif + +/* Number of word slots in the PKA RAM array (per the CMSIS device + * header; e.g. 894 on WB55 V1). */ +#define WC_STM32_PKA_RAM_WORDS \ + (sizeof(((PKA_TypeDef*)0)->RAM) / sizeof(((PKA_TypeDef*)0)->RAM[0])) + +/* Big-endian byte buffer -> PKA RAM (little-endian word order). The + * destination is the PKA RAM slot indexed by 'word_idx'; n is the byte + * count of the source. Mirrors PKA_Memcpy_u8_to_u32 in the HAL. */ +static void wc_stm32_pka_load_be(volatile uint32_t* dst, const uint8_t* src, + uint32_t n) +{ + uint32_t index = 0; + if (dst == NULL || src == NULL) return; + + for (; index < (n / 4U); index++) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16) | + ((uint32_t)src[(n - (index * 4U) - 4U)] << 24); + } + if ((n % 4U) == 1U) { + dst[index] = (uint32_t)src[(n - (index * 4U) - 1U)]; + } + else if ((n % 4U) == 2U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8); + } + else if ((n % 4U) == 3U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16); + } +} + +/* Load an operand into the PKA RAM at `slot` and append the two-word + * PARAM_END terminator immediately after it. Combines wc_stm32_pka_load_be + * with PKA_RAM_PARAM_END(), which appear paired at every operand-load + * site in HAL_PKA_ECCMul / ECDSAVerif / ECDSASign. */ +static void wc_stm32_pka_load_param_be(volatile uint32_t* ram, uint32_t slot, + const uint8_t* src, uint32_t bytes) +{ + wc_stm32_pka_load_be(&ram[slot], src, bytes); + PKA_RAM_PARAM_END(ram, slot + ((bytes + 3U) / 4U)); +} + +/* Forward decl -- defined later (HAL_PKA_Init lives after this point but + * the helper below is referenced from the ECC/ECDSA shim entries which + * also live later, so the call-site ordering is fine). */ +static HAL_StatusTypeDef wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah); + +/* Common preamble for the PKA setup entries (ECCMul / ECDSAVerif / + * ECDSASign): NULL-guard `hpkah`, run ensure_init, NULL-guard the + * resolved instance, and hand back the RAM pointer. Returns NULL on + * any failure -- caller maps NULL -> HAL_ERROR. */ +static volatile uint32_t* wc_stm32_pka_prep_ram(PKA_HandleTypeDef* hpkah) +{ + HAL_StatusTypeDef st; + if (hpkah == NULL) return NULL; + st = wc_stm32_pka_ensure_init(hpkah); + if (st != HAL_OK) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA prep_ram init failed=%d\n", (int)st); +#endif + return NULL; + } + if (hpkah->Instance == NULL) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA prep_ram Instance NULL\n"); +#endif + return NULL; + } + return hpkah->Instance->RAM; +} + +/* PKA RAM (little-endian word order) -> big-endian byte buffer. */ +static void wc_stm32_pka_read_be(uint8_t* dst, volatile const uint32_t* src, + uint32_t n) +{ + uint32_t i = 0; + if (dst == NULL || src == NULL) return; + + for (; i < (n / 4U); i++) { + uint32_t off = n - 4U - (i * 4U); + dst[off + 3U] = (uint8_t)((src[i] ) & 0xFFU); + dst[off + 2U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[off + 1U] = (uint8_t)((src[i] >> 16) & 0xFFU); + dst[off + 0U] = (uint8_t)((src[i] >> 24) & 0xFFU); + } + if ((n % 4U) == 1U) { + dst[0U] = (uint8_t)(src[i] & 0xFFU); + } + else if ((n % 4U) == 2U) { + dst[1U] = (uint8_t)((src[i] ) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 8) & 0xFFU); + } + else if ((n % 4U) == 3U) { + dst[2U] = (uint8_t)((src[i] ) & 0xFFU); + dst[1U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 16) & 0xFFU); + } +} + +/* Optimal bit-size: bytes * 8 minus the leading-zero count of the MSB + * (matches PKA_GetOptBitSize_u8 in the HAL). */ +static uint32_t wc_stm32_pka_optbits(uint32_t byteNumber, uint8_t msb) +{ + uint32_t pos = 0; + uint32_t v = msb; + while (v != 0U) { + v >>= 1; + pos++; + } + if (byteNumber == 0U) { + return 0U; + } + return ((byteNumber - 1U) * 8U) + pos; +} + +#ifndef WC_STM32_PKA_INIT_TIMEOUT + #define WC_STM32_PKA_INIT_TIMEOUT 0x40000 +#endif + +static HAL_StatusTypeDef HAL_PKA_Init(PKA_HandleTypeDef *hpkah) +{ + uint32_t t; + + if (hpkah == NULL) { + return HAL_ERROR; + } + if (hpkah->Instance == NULL) { + hpkah->Instance = PKA; + } + +#ifdef WC_STM32_PKA_CLK_ENABLE + WC_STM32_PKA_CLK_ENABLE(); +#endif + +#if defined(WOLFSSL_STM32C5) && defined(RCC_AHB2RSTR_PKARST) + /* C5A3 silicon (REV_ID=0x2000 in our hand): the PKA IP after the + * first clock-enable comes up in a state where SR.INITOK never + * asserts (CR.EN sticks at 1, SR stays 0x00). The HAL works around + * this with an explicit RCC reset pulse around the first init. + * Cycle AHB2RSTR.PKARST before driving CR.EN -- this clears + * whatever latent state blocks the RAM-erase / self-check from + * completing. Same workaround pattern used for the C5 RNG NIST + * init in random.c. Other V2 PKA chips don't need this; gated on + * WOLFSSL_STM32C5. */ + RCC->AHB2RSTR |= RCC_AHB2RSTR_PKARST; + (void)RCC->AHB2RSTR; + RCC->AHB2RSTR &= ~RCC_AHB2RSTR_PKARST; + (void)RCC->AHB2RSTR; +#endif + + /* Enable the PKA. On L5 / U5 / H5 and friends the IP runs an + * automatic PKA-RAM erase after the first clock-enable; writes to + * CR.EN are silently dropped until the erase completes. Mirror the + * HAL behaviour and spin writing EN until the readback sticks. + * On timeout, clear the Instance pointer so wc_stm32_pka_ensure_init + * will retry on the next call instead of running ops against a + * still-disabled IP. */ + t = 0; + while ((hpkah->Instance->CR & PKA_CR_EN) != PKA_CR_EN) { + hpkah->Instance->CR = PKA_CR_EN; + if (++t >= WC_STM32_PKA_INIT_TIMEOUT) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA Init CR.EN timeout CR=%lx SR=%lx\n", + (unsigned long)hpkah->Instance->CR, + (unsigned long)hpkah->Instance->SR); +#endif + hpkah->Instance = NULL; + return HAL_TIMEOUT; + } + } + +#ifdef PKA_SR_INITOK + /* V2 PKA additionally exposes an INITOK status flag in SR that is + * set when the RAM-erase + self-check sequence completes. The V2 + * HAL_PKA_Init waits for INITOK before returning. Without this + * wait, an immediate ECDSA SIGN can race the init and silently + * fail with OUT_ERROR = 0xCBC9 (PKA_FAILED_COMPUTATION) on U5 / H5 + * / WBA / N6 / C5 / H7S. V1 PKA does not have INITOK and the bit + * is undefined there. */ + t = 0; + while ((hpkah->Instance->SR & PKA_SR_INITOK) == 0U) { + if (++t >= WC_STM32_PKA_INIT_TIMEOUT) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA Init INITOK timeout CR=%lx SR=%lx\n", + (unsigned long)hpkah->Instance->CR, + (unsigned long)hpkah->Instance->SR); +#endif + hpkah->Instance = NULL; + return HAL_TIMEOUT; + } + } +#endif + + /* Clear any pending flags. */ + hpkah->Instance->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_OK; +} + +/* Lazy one-shot init helper. Safe to call from every entry point. + * Returns HAL_OK if the PKA is ready, HAL_ERROR / HAL_TIMEOUT otherwise. + * On failure HAL_PKA_Init resets hpkah->Instance back to NULL so the + * next call retries instead of running ops against a disabled IP. */ +static HAL_StatusTypeDef wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) return HAL_ERROR; + if (hpkah->Instance == NULL) { + return HAL_PKA_Init(hpkah); + } + return HAL_OK; +} + +static void HAL_PKA_RAMReset(PKA_HandleTypeDef *hpkah) +{ + uint32_t i; + if (hpkah == NULL || hpkah->Instance == NULL) return; + for (i = 0; i < WC_STM32_PKA_RAM_WORDS; i++) { + hpkah->Instance->RAM[i] = 0UL; + } +} + +/* Generic start-and-poll sequence with bounded timeout. The default + * spin budget covers a P-521 scalar mul on a slow PKA (worst case on + * the parts wolfSSL targets is ~2 sec; the budget here is well above + * that). Override at compile time via WC_STM32_PKA_TIMEOUT_LOOPS. */ +#ifndef WC_STM32_PKA_TIMEOUT_LOOPS +#define WC_STM32_PKA_TIMEOUT_LOOPS 0x10000000U +#endif + +static HAL_StatusTypeDef wc_stm32_pka_process(PKA_HandleTypeDef *hpkah, + uint32_t mode) +{ + PKA_TypeDef *p; + uint32_t cr, t; + + if (hpkah == NULL || hpkah->Instance == NULL) { + return HAL_ERROR; + } + p = hpkah->Instance; + + /* PKA must be enabled before MODE/START are written. */ + if ((p->CR & PKA_CR_EN) == 0U) { + p->CR = PKA_CR_EN; + } + + /* Update the mode field in CR; clear ALL interrupt enables including + * OPERRIE (operation-error) on V2 PKA. The HAL MODIFY_REG clears + * PROCENDIE | RAMERRIE | ADDRERRIE | OPERRIE -- missing OPERRIE was + * harmless under polling but inconsistent with the HAL flow. */ + cr = p->CR; + cr &= ~(PKA_CR_MODE | PKA_CR_PROCENDIE | PKA_CR_RAMERRIE | + PKA_CR_ADDRERRIE); +#ifdef PKA_CR_OPERRIE + cr &= ~PKA_CR_OPERRIE; +#endif + cr |= (mode << PKA_CR_MODE_Pos) & PKA_CR_MODE; + p->CR = cr; + __DMB(); + + /* Clear any status flags left over from a prior operation before + * starting this one -- matches the HAL, which clears all of + * PROCENDF / RAMERRF / ADDRERRF / OPERRF. In particular OPERRF is + * sticky on some V2 PKA silicon (e.g. STM32H5): a prior operation + * can latch OPERRF, and because the end-of-op cleanup below does not + * clear it, the next operation's poll would see the stale OPERRF and + * abort immediately (reported as WC_HW_E). */ + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | PKA_CLRFR_ADDRERRFC; +#ifdef PKA_CLRFR_OPERRFC + p->CLRFR = PKA_CLRFR_OPERRFC; +#endif + __DMB(); + + /* Start the operation. */ + p->CR = cr | PKA_CR_START; + __DMB(); + + /* Wait for end-of-operation flag, OR an error flag, OR timeout. + * Also watch OPERRF on V2 PKA -- the IP silently rejects invalid + * operand combinations with OPERRF=1 + BUSY=0 + PROCENDF=0, which + * looks like a hang to a poller that only watches PROCENDF/RAMERRF/ + * ADDRERRF. */ + t = 0; + while ((p->SR & PKA_SR_PROCENDF) == 0U) { + uint32_t err_mask = PKA_SR_RAMERRF | PKA_SR_ADDRERRF; +#ifdef PKA_SR_OPERRF + err_mask |= PKA_SR_OPERRF; +#endif + if ((p->SR & err_mask) != 0U) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA err mode=%lx CR=%lx SR=%lx\n", + (unsigned long)mode, (unsigned long)p->CR, + (unsigned long)p->SR); +#endif + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; +#ifdef PKA_CLRFR_OPERRFC + p->CLRFR = PKA_CLRFR_OPERRFC; +#endif + return HAL_ERROR; + } + if (++t >= WC_STM32_PKA_TIMEOUT_LOOPS) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA timeout mode=%lx CR=%lx SR=%lx\n", + (unsigned long)mode, (unsigned long)p->CR, + (unsigned long)p->SR); +#endif + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_TIMEOUT; + } + } + + /* Clear all status flags. */ + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | PKA_CLRFR_ADDRERRFC; + + return HAL_OK; +} + +static HAL_StatusTypeDef HAL_PKA_ECCMul(PKA_HandleTypeDef *hpkah, + PKA_ECCMulInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (in == NULL) return HAL_ERROR; + RAM = wc_stm32_pka_prep_ram(hpkah); + if (RAM == NULL) return HAL_ERROR; + + /* Scalar 'k' bit length, modulus bit length, and 'a' coefficient + * sign indicator -- exactly as the HAL writes them. The HAL takes + * the leading byte of the curve ORDER (not the scalar itself) when + * computing the optimal scalar bit-size on V2 PKA; a small scalar + * with a zero MSB byte would otherwise report 8 fewer bits than + * required and the IP accepts the operation but PROCENDF never + * asserts (timeout, SR=INITOK only). */ + RAM[PKA_ECC_SCALAR_MUL_IN_EXP_NB_BITS] = +#ifdef WOLFSSL_STM32_PKA_V2 + (in->primeOrder != NULL) ? + wc_stm32_pka_optbits(in->scalarMulSize, *(in->primeOrder)) : +#endif + wc_stm32_pka_optbits(in->scalarMulSize, *(in->scalarMul)); + RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF_SIGN] = in->coefSign; + + /* Match the V2 HAL's RAM write order EXACTLY: + * A_COEFF, B_COEFF, MOD_GF, K, INITIAL_POINT_X, INITIAL_POINT_Y, + * N_PRIME_ORDER. + * (V1 PKA has no B_COEFF / N_PRIME_ORDER -- skip those slots.) + * The RAM slots have disjoint addresses so write order shouldn't + * matter in theory, but the V2 PKA IP appears to latch SOME state + * from the write sequence that produces the PROCENDF-never-asserts + * symptom on every V2 chip if the order differs from the HAL. */ + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_A_COEFF, + in->coefA, in->modulusSize); +#ifdef WOLFSSL_STM32_PKA_V2 + if (in->coefB != NULL) { + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_B_COEFF, + in->coefB, in->modulusSize); + } +#endif + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_MOD_GF, + in->modulus, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_K, + in->scalarMul, in->scalarMulSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X, + in->pointX, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y, + in->pointY, in->modulusSize); +#ifdef WOLFSSL_STM32_PKA_V2 + if (in->primeOrder != NULL) { + wc_stm32_pka_load_param_be(RAM, PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER, + in->primeOrder, in->modulusSize); + } +#endif /* WOLFSSL_STM32_PKA_V2 */ + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECC_MUL); +} + +static void HAL_PKA_ECCMul_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECCMulOutTypeDef *out) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL || out == NULL) return; + /* Word view: STM32C5 types PKA RAM as uint8_t[]; others as uint32_t[]. */ + RAM = (volatile const uint32_t*)(void*)hpkah->Instance->RAM; + + /* The HAL recomputes the byte size from the saved IN_OP_NB_BITS + * slot. We do the same. */ + size = (RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] + 7U) / 8U; + + if (out->ptX != NULL) { + wc_stm32_pka_read_be(out->ptX, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], size); + } + if (out->ptY != NULL) { + wc_stm32_pka_read_be(out->ptY, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], size); + } +} + +static HAL_StatusTypeDef HAL_PKA_ECDSAVerif(PKA_HandleTypeDef *hpkah, + PKA_ECDSAVerifInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (in == NULL) return HAL_ERROR; + RAM = wc_stm32_pka_prep_ram(hpkah); + if (RAM == NULL) return HAL_ERROR; + + RAM[PKA_ECDSA_VERIF_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_VERIF_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_VERIF_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_A_COEFF, + in->coef, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_MOD_GF, + in->modulus, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_INITIAL_POINT_X, + in->basePointX, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y, + in->basePointY, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X, + in->pPubKeyCurvePtX, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y, + in->pPubKeyCurvePtY, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_SIGNATURE_R, + in->RSign, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_SIGNATURE_S, + in->SSign, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_HASH_E, + in->hash, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_VERIF_IN_ORDER_N, + in->primeOrder, in->primeOrderSize); + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_VERIFICATION); +} + +static uint32_t HAL_PKA_ECDSAVerif_IsValidSignature( + PKA_HandleTypeDef const *const hpkah) +{ + if (hpkah == NULL || hpkah->Instance == NULL) return 0U; + /* IP-rev-aware success check -- see WC_STM32_PKA_OK_CODE definition. + * Word view: STM32C5 types PKA RAM as uint8_t[]; others as uint32_t[]. */ + return (((volatile const uint32_t*)(void*)hpkah->Instance->RAM) + [PKA_ECDSA_VERIF_OUT_RESULT] == WC_STM32_PKA_OK_CODE) ? 1U : 0U; +} + +static HAL_StatusTypeDef HAL_PKA_ECDSASign(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + HAL_StatusTypeDef st; + + (void)Timeout; + if (in == NULL) return HAL_ERROR; + RAM = wc_stm32_pka_prep_ram(hpkah); + if (RAM == NULL) return HAL_ERROR; + + /* Capture sizes on the handle BEFORE the operation -- V2 PKA + * clobbers RAM[MOD_NB_BITS] during compute. GetResult reads from + * the handle on V2 (matches HAL behaviour). */ + hpkah->primeordersize = in->modulusSize; + RAM[PKA_ECDSA_SIGN_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_SIGN_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_A_COEFF, + in->coef, in->modulusSize); +#ifdef WOLFSSL_STM32_PKA_V2 + /* V2 PKA ECDSA SIGN requires the curve `b` coefficient loaded + * between A_COEFF and MOD_GF. V1 PKA has no B_COEFF slot for sign. + * Without B_COEFF the V2 sign operation reports + * OUT_ERROR = 0xCBC9 (PKA_FAILED_COMPUTATION) and aborts. The HAL + * `PKA_ECDSASign_Set` writes this between A_COEFF and MOD_GF. */ + if (in->coefB != NULL) { + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_B_COEFF, + in->coefB, in->modulusSize); + } +#endif + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_MOD_GF, + in->modulus, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_K, + in->integer, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_INITIAL_POINT_X, + in->basePointX, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y, + in->basePointY, in->modulusSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_HASH_E, + in->hash, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D, + in->privateKey, in->primeOrderSize); + wc_stm32_pka_load_param_be(RAM, PKA_ECDSA_SIGN_IN_ORDER_N, + in->primeOrder, in->primeOrderSize); + + st = wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_SIGNATURE); + if (st != HAL_OK) { + return st; + } + /* Sign reports failure via PKA_ECDSA_SIGN_OUT_ERROR != OK_CODE + * (e.g. unsuitable random k -- caller is expected to retry). + * See WC_STM32_PKA_OK_CODE for the V1/V2 sentinel divergence. */ + { + uint32_t err_code = RAM[PKA_ECDSA_SIGN_OUT_ERROR]; + if (err_code != WC_STM32_PKA_OK_CODE) { +#ifdef WC_STM32_PKA_DIAG + printf("PKA sign OUT_ERROR=%lx\n", (unsigned long)err_code); +#endif + return HAL_ERROR; + } + } + return HAL_OK; +} + +static void HAL_PKA_ECDSASign_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignOutTypeDef *out, + PKA_ECDSASignOutExtParamTypeDef *outExt) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL) return; + /* Word view: STM32C5 types PKA RAM as uint8_t[]; others as uint32_t[]. */ + RAM = (volatile const uint32_t*)(void*)hpkah->Instance->RAM; + /* V2 PKA clobbers RAM[MOD_NB_BITS] during compute; use the size + * saved on the handle. V1 still reads from RAM. */ +#ifdef WOLFSSL_STM32_PKA_V2 + size = hpkah->primeordersize; +#else + size = (RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] + 7U) / 8U; +#endif + + if (out != NULL) { + if (out->RSign != NULL) { + wc_stm32_pka_read_be(out->RSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], size); + } + if (out->SSign != NULL) { + wc_stm32_pka_read_be(out->SSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], size); + } + } + if (outExt != NULL) { + if (outExt->ptX != NULL) { + wc_stm32_pka_read_be(outExt->ptX, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_X], size); + } + if (outExt->ptY != NULL) { + wc_stm32_pka_read_be(outExt->ptY, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_Y], size); + } + } +} + +#endif /* WOLFSSL_STM32_BARE */ + #endif /* WOLFSSL_STM32_PKA */ @@ -99,11 +845,19 @@ extern PKA_HandleTypeDef hpka; /* #define DEBUG_STM32_HASH */ +#if defined(WOLFSSL_STM32_BARE) && !defined(WC_STM32_HASH_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: HASH clock-enable not mapped for this STM32 \ + family. Add WC_STM32_HASH_CLK_ENABLE() to \ + wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_HASH." +#endif + /* User can override STM32_HASH_CLOCK_ENABLE and STM32_HASH_CLOCK_DISABLE */ #ifndef STM32_HASH_CLOCK_ENABLE static WC_INLINE void wc_Stm32_Hash_Clock_Enable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_ENABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_ENABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, ENABLE); @@ -116,7 +870,9 @@ extern PKA_HandleTypeDef hpka; #ifndef STM32_HASH_CLOCK_DISABLE static WC_INLINE void wc_Stm32_Hash_Clock_Disable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_DISABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_DISABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, DISABLE); @@ -224,9 +980,18 @@ static void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) sz = digestSize; while (sz > 0) { - /* first 20 bytes come from instance HR */ + /* first 20 bytes come from the instance digest registers. The + * new-generation HASH IP (gated via WC_STM32_HASH_INSTANCE_HRA + * in stm32.h based on the per-family CMSIS shape) renames this + * from `HR[5]` to `HRA[5]` and adds a separate `HASH_DIGEST->HR[16]` + * for the full digest; the legacy F4/F7/L4 layout still exposes + * `HR[5]` directly on the instance. */ if (i < 5) { + #ifdef WC_STM32_HASH_INSTANCE_HRA + digest[i] = HASH->HRA[i]; + #else digest[i] = HASH->HR[i]; + #endif } #ifdef HASH_DIGEST /* reset comes from HASH_DIGEST */ @@ -643,22 +1408,948 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #ifdef STM32_CRYPTO #ifndef NO_AES -#ifdef WOLFSSL_STM32_CUBEMX +#ifdef WOLFSSL_STM32_BARE + +/* Only complain if the user actually asked for STM32 HW AES. + * `STM32_CRYPTO` is the umbrella enable; without it the BARE driver + * is dead code and missing clock-enable macros for the family are + * harmless (e.g. F767 / F303 / G491 ship NO_STM32_CRYPTO). */ +#if defined(STM32_CRYPTO) && !defined(WC_STM32_AES_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: AES clock-enable not mapped for this STM32 \ + family. Add WC_STM32_AES_CLK_ENABLE() to \ + wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_CRYPTO." +#endif + +/* ===== Bare-metal direct-register AES driver ===== + * No HAL or StdPeriph. Two IP variants: + * - CRYP (FIFO-based): F2/F4/F7/H7/MP13 + * - AES/SAES (TinyAES): L4/L5/U5/H573/G0/G4/WB/WL/WBA/H7S(via SAES) + * + * H7S3 has both a "fat" CRYP (same register shape as H753) AND a + * TinyAES-shape SAES. ST's H7S Cube examples drive AES exclusively via + * SAES -- the plain CRYP is gated behind the security domain. The H7S + * arm therefore goes through the TinyAES branch with WC_STM32_AES_INST + * = SAES (forced via WOLFSSL_STM32_USE_SAES in the per-board settings). + * Variant selected via family ifdefs below. */ + +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32H7) || \ + defined(WOLFSSL_STM32MP13) +/* ----- CRYP IP (FIFO-based) ----- */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* DATATYPE = 10b (byte) so CRYP byte-swaps DR/DOUT for us; key/IV regs are + * still big-endian. Key arrives pre-reversed via wc_AesSetKey (aes.c:4161); + * IV is byte-reversed locally before write. */ +#define STM32_CRYP_DATATYPE_BYTE CRYP_CR_DATATYPE_1 + +static int Stm32AesWaitBusy(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_BUSY) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitInNotFull(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_IFNF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitOutNotEmpty(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_OFNE) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 24) { + return CRYP_CR_KEYSIZE_0; /* 192-bit */ + } + if (keyLen == 32) { + return CRYP_CR_KEYSIZE_1; /* 256-bit */ + } + return 0; /* 128-bit */ +} + +/* aes->key is pre-byte-reversed by wc_AesSetKey under BARE (aes.c:4161), + * so the key words go straight into the K registers in big-endian form. */ +static void Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->K2LR = key[0]; CRYP->K2RR = key[1]; + CRYP->K3LR = key[2]; CRYP->K3RR = key[3]; + } + else if (keyLen == 24) { + CRYP->K1LR = key[0]; CRYP->K1RR = key[1]; + CRYP->K2LR = key[2]; CRYP->K2RR = key[3]; + CRYP->K3LR = key[4]; CRYP->K3RR = key[5]; + } + else { /* 32 */ + CRYP->K0LR = key[0]; CRYP->K0RR = key[1]; + CRYP->K1LR = key[2]; CRYP->K1RR = key[3]; + CRYP->K2LR = key[4]; CRYP->K2RR = key[5]; + CRYP->K3LR = key[6]; CRYP->K3RR = key[7]; + } +} + +/* aes->reg (IV) is NOT pre-reversed by wc_AesSetIV, so byte-reverse here so + * the IV registers see big-endian words. */ +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + CRYP->IV0LR = v[0]; CRYP->IV0RR = v[1]; + CRYP->IV1LR = v[2]; CRYP->IV1RR = v[3]; +} + +/* Push 4 input words then drain 4 output words. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + /* Local word-aligned copy so callers may pass byte-aligned ptrs. */ + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + return ret; + } + buf[i] = CRYP->DOUT; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + return 0; +} + +/* CBC/ECB decrypt requires a key-prep pass first (per F4/H7 reference manual: + * load key, run ALGOMODE=AES_KEY, wait BUSY=0, then start the actual op). */ +static int Stm32AesPrepareKey(word32 keyLen) +{ + int ret; + + CRYP->CR = CRYP_CR_ALGOMODE_AES_KEY | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR |= CRYP_CR_CRYPEN; + ret = Stm32AesWaitBusy(); + CRYP->CR &= ~CRYP_CR_CRYPEN; + return ret; +} -#if defined(WOLFSSL_STM32U5_DHUK) -/* Set the DHUK IV to be used when unwrapping an AES key - * return 0 on success */ -int wc_Stm32_Aes_SetDHUK_IV(struct Aes* aes, const byte* iv, int ivSz) +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) { - if (ivSz != sizeof(aes->dhukIV)) { + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { return BAD_FUNC_ARG; } - XMEMCPY(aes->dhukIV, iv, ivSz); - aes->dhukIVLen = ivSz; + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + + cr = CRYP_CR_ALGOMODE_AES_ECB | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + /* Match the SW / CUBEMX CBC backends: process whole blocks and ignore any + * sub-block remainder. (The bare wc_AesCbcEncrypt/Decrypt wrappers reject a + * non-block-multiple length with BAD_LENGTH_E only under + * WOLFSSL_AES_CBC_LENGTH_CHECKS.) */ + blocks = sz / WC_AES_BLOCK_SIZE; + if (blocks == 0) { + return 0; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + cr = CRYP_CR_ALGOMODE_AES_CBC | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + /* For in-place decrypt (out == in) the block loop overwrites the + * source ciphertext, so the next-IV ciphertext block (the last WHOLE + * block -- any sub-block remainder is ignored) is captured first. */ + if (!isEnc) { + XMEMCPY(aes->tmp, in + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + /* Update aes->reg with new IV (last cipher block for enc; saved + * pre-loop ciphertext for dec). aes.c CBC dispatcher expects + * aes->reg updated for the next call. */ + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* CTR: handled via the ECB-as-transform path in aes.c (XTRANSFORM_AESCTRBLOCK). + * Each per-block ECB call comes through wc_Stm32_Aes_Ecb above; aes.c manages + * the counter and the XOR with plaintext. */ + +/* === HW GCM (CRYP IP phase machine) ========================================== + * Native HW GCM for the case the CRYP IP supports directly: + * - IV is 96 bits (12 bytes) -- the standard GCM IV + * - AAD and PT lengths are whole 16-byte blocks (no partial last block) + * Returns CRYPTOCB_UNAVAILABLE for unsupported parameter combos, so the + * caller (aes.c BARE GCM dispatcher) falls back to SW GHASH + HW ECB. */ +static int Stm32AesXferDiscardOut(const byte* in) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + return Stm32AesWaitBusy(); +} + +/* GCM init phase (GCMPH=00): caller has already written cr_base|phase=0 + * and loaded key/IV. FFLUSH + CRYPEN; wait for CRYPEN to auto-clear + * (H7-documented mechanism for end-of-init-phase; F4 behaves the same). */ +static int Stm32GcmInitPhase(void) +{ + int t; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + t = 0; + while ((CRYP->CR & CRYP_CR_CRYPEN) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) return WC_TIMEOUT_E; + } + return 0; +} + +/* GCM header/AAD phase (GCMPH=01). Whole blocks via DIN (no DOUT + * read); partial last block padded with zeros -- GHASH math uses + * aadSz bits in the final phase to truncate correctly. */ +static int Stm32GcmAadPhase(const byte* aad, word32 aadSz, word32 cr_base) +{ + word32 b, aadBlocks, aadPartial; + int ret; + + if (aadSz == 0) return 0; + aadBlocks = aadSz / WC_AES_BLOCK_SIZE; + aadPartial = aadSz % WC_AES_BLOCK_SIZE; + + CRYP->CR = cr_base | (1u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < aadBlocks; b++) { + ret = Stm32AesXferDiscardOut(aad + b * WC_AES_BLOCK_SIZE); + if (ret != 0) return ret; + } + if (aadPartial > 0) { + byte pad[WC_AES_BLOCK_SIZE]; + XMEMSET(pad, 0, sizeof(pad)); + XMEMCPY(pad, aad + aadBlocks * WC_AES_BLOCK_SIZE, aadPartial); + ret = Stm32AesXferDiscardOut(pad); + if (ret != 0) return ret; + } + ret = Stm32AesWaitBusy(); + if (ret != 0) return ret; + CRYP->CR &= ~CRYP_CR_CRYPEN; + return 0; +} + +/* GCM payload phase (GCMPH=10). */ +static int Stm32GcmPayloadPhase(const byte* in, byte* out, word32 sz, + word32 cr_base, int isEnc) +{ + word32 b, blocks; + int ret; + + if (sz == 0) return 0; + blocks = sz / WC_AES_BLOCK_SIZE; + CRYP->CR = cr_base | (2u << CRYP_CR_GCM_CCMPH_Pos); + if (!isEnc) CRYP->CR |= CRYP_CR_ALGODIR; + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) return ret; + } + ret = Stm32AesWaitBusy(); + if (ret != 0) return ret; + CRYP->CR &= ~CRYP_CR_CRYPEN; + return 0; +} + +/* GCM final phase (GCMPH=11). Feeds 64-bit AAD-bit-len then 64-bit + * PT-bit-len, then reads 4 DOUT words for the tag. + * + * H7 rev.B+ / MP13 (CRYP_VER_2_2): DIN final-phase writes use DATATYPE + * swap normally -- write plain uint32s. + * + * F2/F4/F7 (older CRYP IP, behaves like H7 rev.A): DATATYPE swap does + * NOT apply to the final-phase length block; SW must pre-swap via + * __REV. The two HAL families disagree on this and so do their + * reference drivers -- match each. */ +static int Stm32GcmFinalPhase(word32 aadSz, word32 sz, word32 cr_base, + word32* hwTag) +{ + word32 i; + int ret; + word64 aadBits = (word64)aadSz * 8u; + word64 ptBits = (word64)sz * 8u; + word32 aadBitsHi = (word32)(aadBits >> 32); + word32 aadBitsLo = (word32)aadBits; + word32 ptBitsHi = (word32)(ptBits >> 32); + word32 ptBitsLo = (word32)ptBits; + +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) + aadBitsHi = __REV(aadBitsHi); + aadBitsLo = __REV(aadBitsLo); + ptBitsHi = __REV(ptBitsHi); + ptBitsLo = __REV(ptBitsLo); +#endif + + CRYP->CR = cr_base | (3u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + + ret = Stm32AesWaitInNotFull(); if (ret != 0) return ret; + CRYP->DIN = aadBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) return ret; + CRYP->DIN = aadBitsLo; + ret = Stm32AesWaitInNotFull(); if (ret != 0) return ret; + CRYP->DIN = ptBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) return ret; + CRYP->DIN = ptBitsLo; + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) return ret; + hwTag[i] = CRYP->DOUT; + } + return 0; +} + +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + int ret; + word32 keyLen; + word32 cr_base; + word32 ivBuf[4]; + word32 hwTag[4]; + + if (aes == NULL || iv == NULL || tag == NULL) return BAD_FUNC_ARG; + if (sz > 0 && (in == NULL || out == NULL)) return BAD_FUNC_ARG; + if (tagSz < 4u || tagSz > WC_AES_BLOCK_SIZE) return BAD_FUNC_ARG; + + /* HW only supports 12-byte IV (J0 = IV || 0x00000001 form) and whole- + * block PT (CRYP IP v1 can't natively handle a partial last block). + * AAD partial is OK -- pad with zeros; GHASH math uses aadSz bits. */ + if (ivSz != GCM_NONCE_MID_SZ) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (ivSz=%u not 12)\n", ivSz); + #endif + return CRYPTOCB_UNAVAILABLE; + } + if (sz % WC_AES_BLOCK_SIZE != 0) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (sz=%u not whole-block)\n", sz); + #endif + return CRYPTOCB_UNAVAILABLE; + } +#ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> HW (sz=%u aadSz=%u)\n", sz, aadSz); +#endif + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) return ret; + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) return ret; + WC_STM32_AES_CLK_ENABLE(); + + /* Set CR (ALGOMODE=AES-GCM, DATATYPE, KEYSIZE, phase=init) BEFORE + * loading key/IV. H7 reference HAL sets ALGOMODE first then K/IV; + * the other order on H7 produces a wrong tag even though CT comes + * out right. */ + cr_base = CRYP_CR_ALGOMODE_AES_GCM | STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR = cr_base | (0u << CRYP_CR_GCM_CCMPH_Pos); + + Stm32AesLoadKey(aes->key, keyLen); + + /* 12-byte IV || counter=0x00000002 (HW pre-increments to 2 for the + * first payload block; init phase sets up J0). */ + XMEMSET(ivBuf, 0, 16); + XMEMCPY(ivBuf, iv, 12); + ((byte*)ivBuf)[15] = 0x02; + ByteReverseWords(ivBuf, ivBuf, 16); + CRYP->IV0LR = ivBuf[0]; CRYP->IV0RR = ivBuf[1]; + CRYP->IV1LR = ivBuf[2]; CRYP->IV1RR = ivBuf[3]; + + ret = Stm32GcmInitPhase(); + if (ret != 0) goto exit; + ret = Stm32GcmAadPhase(aad, aadSz, cr_base); + if (ret != 0) goto exit; + ret = Stm32GcmPayloadPhase(in, out, sz, cr_base, isEnc); + if (ret != 0) goto exit; + ret = Stm32GcmFinalPhase(aadSz, sz, cr_base, hwTag); + if (ret != 0) goto exit; + XMEMCPY(tag, hwTag, tagSz < 16 ? tagSz : 16); + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +#else /* TinyAES IP (L4/L5/U5/H5/H573/G0/G4/WB/WL/WBA) */ + +/* ----- TinyAES IP (single-register, polled) ----- + * Different from CRYP: no FIFO; one DINR / DOUTR pair processed per + * 16-byte block. KEYRx are written in *reversed* word order + * (KEYR3 = MSB key word for 128-bit; KEYR7 = MSB for 256-bit). + * AES-192 not supported by hardware (only 128 and 256). */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* CCF (computation-complete) wait/clear, parameterized on the AES + * instance so the same helpers drive both AES and SAES (DHUK) -- on + * chips that have both, the IP layout is identical. + * + * Clear: newer IPs (U3/U5/L4/L5/H5/G4/WBA/C5) use AES_ICR; older WB/WL/ + * G0 use AES_CR.CCFC; U0 has ICR but it only clears ISR.CCF (we poll + * SR.CCF on U0), so U0 also uses CR.CCFC. Trailing __DMB() prevents the + * C5-at-PLL race where the next CCF poll catches an in-flight clear. + * + * Wait: C5 polls AES_ISR.CCF; older TinyAES polls AES_SR.CCF; U0 polls + * SR.CCF (its ISR.CCF only asserts when the matching IER bit is on). */ +#if defined(WOLFSSL_STM32U0) && defined(AES_CR_CCFC) + #define STM32_AES_CLEAR_INST(inst) do { \ + (inst)->CR |= AES_CR_CCFC; __DMB(); } while (0) +#elif defined(AES_ICR_CCF) + #define STM32_AES_CLEAR_INST(inst) do { \ + (inst)->ICR = AES_ICR_CCF; __DMB(); } while (0) +#elif defined(AES_CR_CCFC) + #define STM32_AES_CLEAR_INST(inst) do { \ + (inst)->CR |= AES_CR_CCFC; __DMB(); } while (0) +#else + #error "STM32 AES IP variant: no CCF-clear mechanism known" +#endif + +#if defined(WOLFSSL_STM32U0) && defined(AES_SR_CCF) + #define STM32_AES_CCF_BIT AES_SR_CCF + #define STM32_AES_CCF_REG SR +#elif defined(AES_ISR_CCF) + #define STM32_AES_CCF_BIT AES_ISR_CCF + #define STM32_AES_CCF_REG ISR +#elif defined(AES_SR_CCF) + #define STM32_AES_CCF_BIT AES_SR_CCF + #define STM32_AES_CCF_REG SR +#else + #error "STM32 AES IP variant: no CCF status register known" +#endif + +/* Back-compat alias for the unparameterized regular-AES call sites. */ +#define STM32_AES_CLEAR_CCF() STM32_AES_CLEAR_INST(WC_STM32_AES_INST) + +#define STM32_AES_DATATYPE_BYTE AES_CR_DATATYPE_1 /* 0b10 */ +#define STM32_AES_CHMOD_ECB 0u +#define STM32_AES_CHMOD_CBC AES_CR_CHMOD_0 +#define STM32_AES_CHMOD_CTR AES_CR_CHMOD_1 +#define STM32_AES_CHMOD_GCM (AES_CR_CHMOD_0 | AES_CR_CHMOD_1) +#define STM32_AES_MODE_ENC 0u +#define STM32_AES_MODE_KEYDERIVE AES_CR_MODE_0 +#define STM32_AES_MODE_DEC AES_CR_MODE_1 +#define STM32_AES_MODE_KD_DEC (AES_CR_MODE_0 | AES_CR_MODE_1) + +/* Poll CCF on either AES instance (regular or SAES). Force prior + * config / DINR writes to retire before polling -- required on the C5 + * family at PLL clock, where without the barrier the write buffer can + * defer the last DINR write past the first CCF read, latching us on a + * stale (still-zero) status. */ +static int Stm32AesPollCCF(AES_TypeDef* inst, int timeout) +{ + int t = 0; + __DMB(); + while ((inst->STM32_AES_CCF_REG & STM32_AES_CCF_BIT) == 0) { + if (++t >= timeout) { + #if defined(DEBUG_STM32_BARE_GCM) || defined(WC_STM32_SAES_DIAG) + printf("[STM32 BARE AES] CCF timeout: CCFreg=0x%08lx CR=0x%08lx " + "ISR=0x%08lx SR=0x%08lx\n", + (unsigned long)(inst->STM32_AES_CCF_REG), + (unsigned long)inst->CR, + (unsigned long)inst->ISR, + (unsigned long)inst->SR); + #endif + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* Back-compat wrapper for the regular-AES (`WC_STM32_AES_INST`) call sites. + * DHUK / SAES call Stm32AesPollCCF(SAES, STM32_BARE_SAES_TIMEOUT) directly. */ +static int Stm32AesWaitCCF(void) +{ + return Stm32AesPollCCF(WC_STM32_AES_INST, STM32_BARE_AES_TIMEOUT); +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 32) { + return AES_CR_KEYSIZE; /* 256-bit */ + } + return 0; /* 128-bit (192 not supported by HW) */ +} + +/* Load a pre-byte-reversed AES key into KEYR0..KEYR(N-1) of `inst`. + * KEYR(N-1) holds the high word; KEYR0 must be written first per RM. + * 16-byte (AES-128) and 32-byte (AES-256) keys only -- TinyAES HW does + * not support AES-192. */ +static int Stm32AesLoadKeyInst(AES_TypeDef* inst, const word32* key, + word32 keyLen) +{ + if (keyLen == 16) { + inst->KEYR0 = key[3]; inst->KEYR1 = key[2]; + inst->KEYR2 = key[1]; inst->KEYR3 = key[0]; + return 0; + } + if (keyLen == 32) { + inst->KEYR0 = key[7]; inst->KEYR1 = key[6]; + inst->KEYR2 = key[5]; inst->KEYR3 = key[4]; + inst->KEYR4 = key[3]; inst->KEYR5 = key[2]; + inst->KEYR6 = key[1]; inst->KEYR7 = key[0]; + return 0; + } + return BAD_FUNC_ARG; +} + +static int Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + return Stm32AesLoadKeyInst(WC_STM32_AES_INST, key, keyLen); +} + +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + /* IVRx ordering matches keyword: IVR3 = MSB */ + WC_STM32_AES_INST->IVR3 = v[0]; + WC_STM32_AES_INST->IVR2 = v[1]; + WC_STM32_AES_INST->IVR1 = v[2]; + WC_STM32_AES_INST->IVR0 = v[3]; +} + +/* One 16-byte block in / out. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + WC_STM32_AES_INST->DINR = buf[i]; + } + ret = Stm32AesWaitCCF(); + if (ret != 0) { + return ret; + } + for (i = 0; i < 4; i++) { + buf[i] = WC_STM32_AES_INST->DOUTR; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + /* Clear CCF for next block */ + STM32_AES_CLEAR_CCF(); + return 0; +} + +/* Run the key-derivation pass before decrypt (CBC/ECB). */ +static int Stm32AesPrepareKey(word32 keyLen, word32 chmod) +{ + int ret; + word32 cr = STM32_AES_MODE_KEYDERIVE | STM32_AES_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen) | chmod; + WC_STM32_AES_INST->CR = cr; + WC_STM32_AES_INST->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + return ret; +} + +/* Forward decls for the SAES self-init helpers defined further down + * (inside the WOLFSSL_DHUK || WOLFSSL_STM32_USE_SAES block). Needed + * because the TinyAES ECB/CBC entry points have to drive the SAES + * init dance before the first CR write when routed via SAES. */ +#ifdef WOLFSSL_STM32_USE_SAES +static int Stm32SaesWaitInit(void); +static void Stm32SaesEnsureRng(void); +#endif + +/* Shared setup for TinyAES Ecb/Cbc: clock enable, SAES self-init + * (when routed), CR=0 / config program, key load, decrypt key- + * derivation pass. Caller follows up with the IV (Cbc) and the + * single-write CR | EN to start the data path. + * + * SAES quirk: KEYSIZE/MODE/CHMOD are only writable when EN=0 AND + * BUSY=0, so a Stm32SaesWaitInit() drain is inserted after every + * write that can leave BUSY set (cold-enable, CR=0 reset, KEYR + * load). */ +static int Stm32AesSetupCR(struct Aes* aes, int isEnc, word32 chmod, + word32 keyLen, word32* outCr) +{ + int ret; + word32 cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + chmod | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + + WC_STM32_AES_CLK_ENABLE_INST(); +#ifdef WOLFSSL_STM32_USE_SAES + Stm32SaesEnsureRng(); + ret = Stm32SaesWaitInit(); + if (ret != 0) return ret; +#endif + + WC_STM32_AES_INST->CR = 0; +#ifdef WOLFSSL_STM32_USE_SAES + ret = Stm32SaesWaitInit(); + if (ret != 0) return ret; +#endif + + WC_STM32_AES_INST->CR = cr; + STM32_AES_CLEAR_CCF(); + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) return ret; +#ifdef WOLFSSL_STM32_USE_SAES + ret = Stm32SaesWaitInit(); + if (ret != 0) return ret; +#endif + + if (!isEnc) { + WC_STM32_AES_INST->CR = ((cr & ~AES_CR_MODE_Msk) | + STM32_AES_MODE_KEYDERIVE); + WC_STM32_AES_INST->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + if (ret != 0) return ret; + WC_STM32_AES_INST->CR = cr; + } + *outCr = cr; + return 0; +} + +/* Single-write CR | EN. OR-RMW would lose KEYSIZE/MODE/CHMOD on SAES + * if BUSY happens to be set when the second write lands. */ +static int Stm32AesBlockLoop(const byte* in, byte* out, word32 sz) +{ + word32 blocks = sz / WC_AES_BLOCK_SIZE; + word32 b; + int ret = 0; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) break; + } + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, cr; + + if (aes == NULL || out == NULL || in == NULL) return BAD_FUNC_ARG; + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) return BAD_FUNC_ARG; + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) return ret; + if (keyLen != 16 && keyLen != 32) return BAD_FUNC_ARG; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) return ret; + + ret = Stm32AesSetupCR(aes, isEnc, STM32_AES_CHMOD_ECB, keyLen, &cr); + if (ret != 0) goto exit; + + WC_STM32_AES_INST->CR = cr | AES_CR_EN; + ret = Stm32AesBlockLoop(in, out, sz); + +exit: + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, cr, blocks; + + if (aes == NULL || out == NULL || in == NULL) return BAD_FUNC_ARG; + /* Match the CRYP backend + SW/CUBEMX: process whole blocks and ignore any + * sub-block remainder (the wc_AesCbcEncrypt/Decrypt wrappers reject a + * non-block-multiple only under WOLFSSL_AES_CBC_LENGTH_CHECKS). */ + blocks = sz / WC_AES_BLOCK_SIZE; + if (blocks == 0) return 0; + sz = blocks * WC_AES_BLOCK_SIZE; + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) return ret; + if (keyLen != 16 && keyLen != 32) return BAD_FUNC_ARG; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) return ret; + + ret = Stm32AesSetupCR(aes, isEnc, STM32_AES_CHMOD_CBC, keyLen, &cr); + if (ret != 0) goto exit; + + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + WC_STM32_AES_INST->CR = cr | AES_CR_EN; + + /* In-place decrypt overwrites the last ciphertext block, so capture + * it for the next IV before the block loop. */ + if (!isEnc) { + XMEMCPY(aes->tmp, in + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + } + ret = Stm32AesBlockLoop(in, out, sz); + if (ret == 0) { + blocks = sz / WC_AES_BLOCK_SIZE; + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE); + } + } + +exit: + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* TinyAES HW GCM: deferred. Falls back to software GCM (with HW ECB + * blocks via wc_AesEncrypt -> wc_Stm32_Aes_Ecb). */ +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + (void)aes; (void)out; (void)in; (void)sz; + (void)iv; (void)ivSz; + (void)tag; (void)tagSz; + (void)aad; (void)aadSz; (void)isEnc; + return CRYPTOCB_UNAVAILABLE; +} + +#endif /* CRYP IP vs TinyAES IP */ + + +#if defined(WOLFSSL_DHUK) || defined(WOLFSSL_STM32_USE_SAES) +/* ----- BARE SAES helpers (shared by DHUK and the TinyAES SAES route) + * Direct-register SAES self-init / RNG enable used by both the DHUK + * wrap/unwrap path and the TinyAES BARE path when routed to SAES via + * WOLFSSL_STM32_USE_SAES. + * + * SAES on H5/U3/U5/WBA/C5/N6 fetches random data from the RNG on the + * first clock-enable; SR.BUSY stays set until that init completes and + * the IP silently rejects any CR/KEYR/IVR writes during that window. + * The regular AES IP has no such dance, so the TinyAES path that + * targets WC_STM32_AES_INST = CRYP doesn't need these helpers, but the + * SAES routing does. */ + +#ifndef SAES + #error "WOLFSSL_DHUK / WOLFSSL_STM32_USE_SAES require SAES symbol from \ + CMSIS device header" +#endif + +#ifndef STM32_BARE_SAES_TIMEOUT + #define STM32_BARE_SAES_TIMEOUT 0x10000 +#endif + +/* SAES self-init: the IP fetches random data from the RNG on first + * clock-enable. SR.BUSY stays set until init completes. SAES rejects + * config writes during this window. Must be called once after + * WC_STM32_SAES_CLK_ENABLE() before touching CR / KEYR / DINR. */ +static int Stm32SaesWaitInit(void) +{ + int t = 0; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0U) { + if (++t >= STM32_BARE_SAES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } return 0; } +/* Ensure the RNG IP is producing data. SAES init pulls from the + * RNG, so RNGEN must be set before the SAES clock-enable triggers + * SAES self-init. wc_GenerateSeed sets RNGEN on its first call, + * but DHUK / the SAES TinyAES route may run before any RNG consumer. */ +static void Stm32SaesEnsureRng(void) +{ +#ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); +#endif + if ((RNG->CR & RNG_CR_RNGEN) == 0U) { + RNG->CR |= RNG_CR_RNGEN; + __DMB(); + } +} + +#endif /* WOLFSSL_DHUK || WOLFSSL_STM32_USE_SAES */ + + + +#elif defined(WOLFSSL_STM32_CUBEMX) + +#if defined(WOLFSSL_DHUK) /* Wrap an AES key using the DHUK */ int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, word32* outSz, const byte* iv, int ivSz) @@ -702,40 +2393,6 @@ int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, } -int wc_Stm32_Aes_UnWrap(struct Aes* aes, CRYP_HandleTypeDef* hcryp, - const byte* in, word32 inSz, const byte* iv, int ivSz) -{ - int ret = 0; - - /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from random.c - turn off the RNG clock -- re-enable the clock here */ - __HAL_RCC_RNG_CLK_ENABLE(); - - /* setup for key unwrapping */ - XMEMSET(hcryp, 0, sizeof(CRYP_HandleTypeDef)); - hcryp->Instance = SAES; - hcryp->Init.DataType = CRYP_DATATYPE_8B; - hcryp->Init.KeySize = CRYP_KEYSIZE_256B; - hcryp->Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; - if (ivSz > 0 && iv != NULL) { - hcryp->Init.pInitVect = (uint32_t *)iv; - hcryp->Init.Algorithm = CRYP_AES_CBC; - } - else { - hcryp->Init.Algorithm = CRYP_AES_ECB; - } - hcryp->Init.KeyIVConfigSkip = CRYP_KEYIVCONFIG_ALWAYS; - hcryp->Init.KeySelect = CRYP_KEYSEL_HW; /* use DHUK to unwrap with use */ - hcryp->Init.KeyMode = CRYP_KEYMODE_WRAPPED; - ret = HAL_CRYP_Init(hcryp); - if (ret == HAL_OK) { - /* On success the key is placed into a location where the next encrypt/decrypt - * calls using hcryp make use of the key */ - ret = HAL_CRYPEx_UnwrapKey(hcryp, (uint32_t*)in, 100); - } - return ret; -} - #endif int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) @@ -768,10 +2425,10 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) break; } -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_DHUK /* Use hardware key */ - if (useSaes && (aes->devId == WOLFSSL_STM32U5_DHUK_DEVID || - aes->devId == WOLFSSL_STM32U5_SAES_DEVID)) { + if (useSaes && (aes->devId == WOLFSSL_DHUK_DEVID || + aes->devId == WOLFSSL_SAES_DEVID)) { /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from random.c turn off the RNG clock -- re-enable the clock here */ @@ -781,7 +2438,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) hcryp->Init.DataType = CRYP_DATATYPE_8B; /* Key select (HW, or Normal) */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_DEVID) { + if (aes->devId == WOLFSSL_DHUK_DEVID) { hcryp->Init.KeySelect = CRYP_KEYSEL_HW; } else { @@ -878,7 +2535,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, void wc_Stm32_Aes_Cleanup(void) { } -#endif /* WOLFSSL_STM32_CUBEMX */ +#endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ @@ -939,7 +2596,15 @@ static int stm32_getabs_from_mp_int(uint8_t *dst, const mp_int *a, int sz, defined(WOLFSSL_SP_INT_NEGATIVE)) *abs_sign = a->sign; #else - *abs_sign = 1; /* default to negative */ + /* See companion comment in stm32_getabs_from_hexstr. sp_int + * without WOLFSSL_SP_INT_NEGATIVE has no sign field; the mp_int + * is the modular representative of `a` (e.g. P-256 Af = p-3, + * a large positive integer). Default to POSITIVE so PKA reads + * coef + sign self-consistently. Was incorrectly 1 (negative) + * which made the V2 PKA ECCMul compute on a wrong curve and + * hang/error; also caused the V1 PKA ECDSA sign+verify + * roundtrip to fail on WL55. */ + *abs_sign = 0; /* positive */ #endif res = mp_abs((mp_int*)a, &x); if (res == MP_OKAY) @@ -969,7 +2634,18 @@ static int stm32_getabs_from_hexstr(const char* hex, uint8_t* dst, int sz, defined(WOLFSSL_SP_INT_NEGATIVE)) *abs_sign = x.sign; #else - *abs_sign = 1; /* default to negative */ + /* sp_int without WOLFSSL_SP_INT_NEGATIVE has no sign field; + * mp_read_radix returns the absolute value as a positive + * integer. The wolfssl ECC table stores the coefficient `a` + * as its modular representative (e.g. P-256 Af = p-3, a + * large positive number), so the sign here is POSITIVE + * (a = +(p-3) which mod p equals -3 -- mathematically the + * same as -3 with coefSign = negative, but the PKA expects + * coef + coefSign to be self-consistent). Defaulting to 1 + * (negative) caused the PKA to compute on curve a=+3 + * instead of a=-3, producing R/S that don't verify against + * the SW-generated pubkey. */ + *abs_sign = 0; /* positive */ #endif res = mp_abs(&x, &x); } @@ -985,7 +2661,9 @@ static int stm32_get_from_hexstr(const char* hex, uint8_t* dst, int sz) } /* STM32 PKA supports up to 640-bit numbers */ +#ifndef STM32_MAX_ECC_SIZE #define STM32_MAX_ECC_SIZE (80) +#endif #ifdef WOLFSSL_STM32_PKA_V2 /* find curve based on prime/modulus and return order/coefB */ @@ -1036,6 +2714,11 @@ static int stm32_get_curve_params(mp_int* modulus, return MP_OKAY on success */ +/* The STM32H563 "light" PKA has no generic ECC scalar-mul mode -- only the + * integrated ECDSA verify (mode 0x26). Under WC_STM32_PKA_VERIFY_ONLY, + * keygen and (software) sign use the C ecc_mulmod from ecc.c, and only + * stm32_ecc_verify_hash_ex below routes to the HW PKA. */ +#if !defined(WC_STM32_PKA_VERIFY_ONLY) int wc_ecc_mulmod_ex2(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, mp_int* modulus, mp_int* o, WC_RNG* rng, int map, void* heap) @@ -1162,6 +2845,7 @@ int ecc_map_ex(ecc_point* P, mp_int* modulus, mp_digit mp, int ct) (void)ct; return MP_OKAY; } +#endif /* !WC_STM32_PKA_VERIFY_ONLY */ int stm32_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) @@ -1211,7 +2895,8 @@ int stm32_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, if (status == MP_OKAY) status = stm32_get_from_hexstr(key->dp->Gy, gen_y, size); if (status == MP_OKAY) - status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, &coefA_sign); + status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, + &coefA_sign); if (status != MP_OKAY) return status; @@ -1297,7 +2982,8 @@ int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, WC_RNG* rng, if (status == MP_OKAY) status = stm32_get_from_hexstr(key->dp->Gy, gen_y, size); if (status == MP_OKAY) - status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, &coefA_sign); + status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, + &coefA_sign); #ifdef WOLFSSL_STM32_PKA_V2 if (status == MP_OKAY) status = stm32_get_from_hexstr(key->dp->Bf, coefB, size); @@ -1381,5 +3067,6 @@ int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, WC_RNG* rng, return status; } + #endif /* HAVE_ECC */ #endif /* WOLFSSL_STM32_PKA */ diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index 609e7fb6b45..de5a72ffb22 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -4253,6 +4253,14 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #elif defined(STM32_RNG) /* Generate a RNG seed using the hardware random number generator * on the STM32F2/F4/F7/L4. */ + #include + /* Pulls in WC_STM32_RNG_CLK_ENABLE for WOLFSSL_STM32_BARE builds */ + #ifdef WC_STM32_RNG_DIAG + /* The WC_STM32_RNG_DIAG paths below use printf(); pull in stdio.h so the + * file compiles on strict C99+ toolchains when diagnostics are enabled. */ + #include + #endif + #ifdef WOLFSSL_STM32_CUBEMX int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) @@ -4321,6 +4329,36 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #endif + /* Bounded poll for DRDY, plus recovery from SECS / CECS. The + * unbounded `while (DRDY == 0)` loop in the original code spins + * forever on chips where the RNG kernel clock is unstable + * (e.g. WL55 with RNGSEL = MSI under sustained ECDSA-key-gen + * load), because once the IP latches a Seed-error or Clock- + * error condition it stops asserting DRDY. Per the STM32 RM + * recovery sequence: clear SEIS/CEIS, toggle RNGEN, discard the + * stale words sitting in the RNG output, then retry. */ + #ifndef STM32_BARE_RNG_BYTE_TIMEOUT + #define STM32_BARE_RNG_BYTE_TIMEOUT 0x40000 + #endif + #ifndef STM32_BARE_RNG_MAX_RETRIES + #define STM32_BARE_RNG_MAX_RETRIES 8 + #endif + + /* Recover-and-retry on a latched SECS/CECS is the behavior the new direct- + * register BARE port needs (e.g. WL55 with RNGSEL = MSI under sustained + * key-gen load) and the STM32C5 NIST RNG needs on first-init. The + * pre-existing direct-register users -- classic WOLFSSL_STM32F427_RNG, + * WOLFSSL_STM32_RNG_NOLIB and STM32_NUTTX_RNG -- keep the HISTORICAL + * immediate SECS/CECS fast-fail by default, so this change does not alter + * their behavior. Define WC_STM32_RNG_RECOVER to opt a classic family into + * the recovery path, or WOLFSSL_STM32_RNG_LEGACY_FAILFAST to force the + * fast-fail even on the new families. */ + #if !defined(WC_STM32_RNG_RECOVER) && \ + !defined(WOLFSSL_STM32_RNG_LEGACY_FAILFAST) && \ + (defined(WOLFSSL_STM32_BARE) || defined(RNG_CAND_NIST_CR_VALUE)) + #define WC_STM32_RNG_RECOVER + #endif + /* Generate a RNG seed using the hardware RNG on the STM32F427 * directly, following steps outlined in STM32F4 Reference * Manual (Chapter 24) for STM32F4xx family. */ @@ -4328,6 +4366,10 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) { int ret; word32 i; + word32 t; + word32 guard; + word32 retries; + word32 sr; (void)os; ret = wolfSSL_CryptHwMutexLock(); @@ -4337,29 +4379,204 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #ifndef STM32_NUTTX_RNG /* enable RNG peripheral clock */ - RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); + #else + /* Default for F4/F7/L4/L5/U5/H5/H7 -- RNG on AHB2 */ + RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #endif + #endif + + /* On the new-gen STM32C5 RNG IP the CR register is locked at + * reset (CONFIGLOCK clear, but the IP refuses to produce data + * until a NIST-compliant CONFIG1/2/3 + NSCR + HTCR sequence + * has been written under CONDRST). The HAL ships canonical + * candidate values in the device header (RNG_CAND_NIST_*). + * Detect the family by presence of that symbol -- on chips + * without it (F4/F7/L4/U5/H7/H5/WL/etc.) skip. Do this only + * on the first call (RNGEN clear) so subsequent calls don't + * disturb a running peripheral. */ + #if defined(RNG_CAND_NIST_CR_VALUE) && defined(RNG_CR_CONDRST) && \ + !defined(WC_STM32_RNG_NO_NIST_INIT) + if ((WC_RNG_CR & RNG_CR_RNGEN) == 0U) { + #ifdef RNG_SR_BUSY + /* HAL flow: drain BUSY before writing CR. */ + t = 0; + while ((WC_RNG_SR & RNG_SR_BUSY) != 0U) { + if (++t >= STM32_BARE_RNG_BYTE_TIMEOUT) { + break; + } + } + #endif + WC_RNG_CR = (uint32_t)RNG_CAND_NIST_CR_VALUE | + (uint32_t)RNG_CR_CONDRST; + #ifdef RNG_CAND_NIST_NSCR_VALUE + RNG->NSCR = (uint32_t)RNG_CAND_NIST_NSCR_VALUE; + #endif + #ifdef RNG_CAND_NIST_HTCR_VALUE + RNG->HTCR[0] = (uint32_t)RNG_CAND_NIST_HTCR_VALUE; + #endif + /* Clear CONDRST and wait for the IP to mirror it back. The + * STM32 HAL polls RNG_CR.CONDRST (not SR.BUSY) for completion + * of the conditioning soft-reset; SR.BUSY drops earlier in + * the seed-pull pipeline on at least the C5 IP and reading + * it as "conditioning done" trips a SECS=1 a few microseconds + * later when RNGEN goes high. Bounded so a misconfigured + * kernel clock returns a clean error instead of hanging. */ + WC_RNG_CR &= ~(uint32_t)RNG_CR_CONDRST; + t = 0; + while ((WC_RNG_CR & RNG_CR_CONDRST) != 0U) { + if (++t >= STM32_BARE_RNG_BYTE_TIMEOUT) { +#ifdef WC_STM32_RNG_DIAG + printf("[RNG] CONDRST stuck CR=%08lx SR=%08lx\n", + (unsigned long)WC_RNG_CR, + (unsigned long)WC_RNG_SR); +#endif + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } + } +#ifdef WC_STM32_RNG_DIAG + printf("[RNG] post-NIST CR=%08lx SR=%08lx t=%lu\n", + (unsigned long)WC_RNG_CR, + (unsigned long)WC_RNG_SR, + (unsigned long)t); +#endif + } #endif /* enable RNG interrupt, set IE bit in RNG->CR register */ WC_RNG_CR |= RNG_CR_IE; /* enable RNG, set RNGEN bit in RNG->CR. Activates RNG, - * RNG_LFSR, and error detector */ + * RNG_LFSR, and error detector. WC_STM32_RNG_CED_DISABLE + * additionally sets CR.CED=1 to suppress the clock-error + * detection -- the Linux STM32 RNG driver does this and on + * the C5 silicon the CED detector trips on a (perfectly fine) + * 48 MHz kernel clock for reasons unclear in the RM. */ +#ifdef WC_STM32_RNG_CED_DISABLE + WC_RNG_CR |= RNG_CR_RNGEN | RNG_CR_CED; +#else WC_RNG_CR |= RNG_CR_RNGEN; +#endif - /* verify no errors, make sure SEIS and CEIS bits are 0 - * in RNG->SR register */ - if (WC_RNG_SR & (RNG_SR_SECS | RNG_SR_CECS)) { +#ifndef WC_STM32_RNG_RECOVER + /* Default for the classic direct-register families + * (WOLFSSL_STM32F427_RNG / WOLFSSL_STM32_RNG_NOLIB / STM32_NUTTX_RNG): + * the historical immediate SECS/CECS fast-fail. The recover-and-retry + * path (below) is enabled only for the new families that need it + * (WC_STM32_RNG_RECOVER), where bailing here returned a spurious -199 + * on first-init (e.g. STM32C5) or on a transient clock glitch. */ + if ((WC_RNG_SR & (RNG_SR_SECS | RNG_SR_CECS)) != 0U) { wolfSSL_CryptHwMutexUnLock(); return RNG_FAILURE_E; } +#endif + + /* (No early SECS/CECS bail here unless WOLFSSL_STM32_RNG_LEGACY_FAILFAST + * is defined, above.) The HAL doesn't check error status immediately + * after RNGEN -- the IP needs a few cycles after enable for the first + * seed pull, and a transient SEIS/SECS can latch and resolve itself + * through the auto-reset that the retry loop below already handles. + * Bailing here returned RNG_FAILURE_E (-199) on first-init on the + * STM32C5 silicon. NOTE: this branch serves all direct-register STM32 + * RNG users (WOLFSSL_STM32F427_RNG / WOLFSSL_STM32_RNG_NOLIB / + * STM32_NUTTX_RNG), not only the BARE/C5 port, so this bounded-retry + + * recovery applies to every NOLIB family. It is strictly more robust + * than the old early fast-fail: it still returns RNG_FAILURE_E after + * the retry budget. */ for (i = 0; i < sz; i++) { - /* wait until RNG number is ready */ - while ((WC_RNG_SR & RNG_SR_DRDY) == 0) { } + retries = 0; + for (;;) { + t = 0; + /* Sample SR once before the loop so the post-loop + * happy-path / error checks have a defined value even + * if STM32_BARE_RNG_BYTE_TIMEOUT is configured to 0. */ + sr = WC_RNG_SR; + /* Bounded DRDY poll -- breaks on either DRDY or any + * error indication (SECS/CECS). */ + while (t < STM32_BARE_RNG_BYTE_TIMEOUT) { + sr = WC_RNG_SR; + if ((sr & (RNG_SR_DRDY | RNG_SR_SECS | + RNG_SR_CECS)) != 0U) { + break; + } + t++; + } - /* get value */ - output[i] = WC_RNG_DR; + /* Happy path: data ready and no error. */ + if ((sr & RNG_SR_DRDY) != 0U && + (sr & (RNG_SR_SECS | RNG_SR_CECS)) == 0U) { + output[i] = WC_RNG_DR; + break; + } + +#ifndef WC_STM32_RNG_RECOVER + /* Classic-family default: a latched seed/clock error is a hard + * failure, not something to recover-and-retry (preserves the + * historical F427 / NOLIB / NuttX fast-fail). A plain timeout + * (no error bit) still falls through to the bounded retry. */ + if ((sr & (RNG_SR_SECS | RNG_SR_CECS)) != 0U) { + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } +#endif + + /* Either timed out or an error latched. Recover. */ + if (++retries > STM32_BARE_RNG_MAX_RETRIES) { +#ifdef WC_STM32_RNG_DIAG + printf("[RNG] retry max byte=%lu sr=%08lx CR=%08lx\n", + (unsigned long)i, + (unsigned long)sr, + (unsigned long)WC_RNG_CR); +#endif + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } +#ifdef WC_STM32_RNG_DIAG + printf("[RNG] retry byte=%lu retries=%lu sr=%08lx\n", + (unsigned long)i, + (unsigned long)retries, + (unsigned long)sr); +#endif + + /* Recovery sequence (per STM32 RM RNG chapter): + * 1. Clear SEIS / CEIS interrupt status by writing 0 + * to those bits. All other SR bits are read-only + * status indicators (DRDY / BUSY / SECS / CECS); + * writing 0 to them has no effect per the RM, so + * a plain 0 write is safe and avoids the + * read-modify-write hitting any reserved bits the + * IP revision may add later. + * 2. Toggle RNGEN off then on to drop any stale + * LFSR state that may be tainted by the error. + * 3. Discard four DR reads to flush the pipeline + * (only meaningful when DRDY is set; otherwise + * the reads are harmless and bounded). The 'guard' + * counter bounds the loop independently of 't' so a + * marginal kernel clock (DRDY stays 0, no error + * latched) cannot spin forever -- it falls through + * to the outer retry/backoff instead of hanging. */ + WC_RNG_SR = 0; + WC_RNG_CR &= ~RNG_CR_RNGEN; + WC_RNG_CR |= RNG_CR_RNGEN; + t = 0; + guard = 0; + while (t < 4U && guard < STM32_BARE_RNG_BYTE_TIMEOUT) { + guard++; + if ((WC_RNG_SR & RNG_SR_DRDY) != 0U) { + (void)WC_RNG_DR; + t++; + } + else if ((WC_RNG_SR & + (RNG_SR_SECS | RNG_SR_CECS)) != 0U) { + /* Clock-error during recovery -- bail and + * let the outer retry handle it. */ + break; + } + } + } } wolfSSL_CryptHwMutexUnLock(); diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index e3d7637470d..965c1fd7612 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -301,10 +301,6 @@ struct Aes { #endif #ifdef HAVE_AESGCM Gcm gcm; -#ifdef WOLFSSL_STM32U5_DHUK - byte dhukIV[16]; /* Used when unwrapping an encrypted key */ - int dhukIVLen; -#endif #ifdef WOLFSSL_SE050 sss_symmetric_t aes_ctx; /* used as the function context */ int ctxInitDone; @@ -341,7 +337,7 @@ struct Aes { byte use_sha3_hw_crypto; #endif #endif /* __aarch64__ && WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ -#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_STM32U5_DHUK) +#if defined(WOLF_CRYPTO_CB) int devId; void* devCtx; /* Opaque handle for CryptoCB device */ #endif diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 9aa0d418ae1..54784dd6181 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -23,11 +23,508 @@ #define _WOLFPORT_STM32_H_ /* Generic STM32 Hashing and Crypto Functions */ -/* Supports CubeMX HAL or Standard Peripheral Library */ +/* Supports CubeMX HAL, Standard Peripheral Library, or bare-metal direct + * register access (WOLFSSL_STM32_BARE). */ #include #include /* for MATH_INT_T */ +/* STM32H563 has a reduced "light" PKA: it performs ECDSA signature + * verification but not signing (per ST -- the H563 datasheet sec. 3.32 + * lists ECDSA verification only; H573 supports full sign + verify). + * Auto-enable verify-only so wc_ecc_sign_hash routes to software while + * wc_ecc_verify_hash stays on the HW PKA. STM32H573xx keeps full PKA. + * Define WC_STM32_PKA_VERIFY_ONLY yourself for any other verify-only part. */ +#if defined(WOLFSSL_STM32_PKA) && defined(STM32H563xx) && \ + !defined(STM32H573xx) && !defined(WC_STM32_PKA_VERIFY_ONLY) + #define WC_STM32_PKA_VERIFY_ONLY +#endif + +#ifdef WOLFSSL_STM32_BARE +/* Per-family direct-register clock-enable macros. CMSIS device header is + * already included via settings.h. RCC->...ENR bit names come from CMSIS. + * + * Clock enable/disable share one idiom across families: OR (enable) or + * AND-NOT (disable) the RCC enable register, with a read-back after enable + * so the bit is committed before the peripheral is touched. WC_STM32_CLK_EN + * / WC_STM32_CLK_DIS centralize that idiom; each family arm below just maps + * its peripheral macros onto the right RCC register + bit. (The MP13 arm + * keeps its own form -- it uses separate set/clear registers.) */ +#define WC_STM32_CLK_EN(reg, bit) \ + do { RCC->reg |= (bit); (void)RCC->reg; } while (0) +#define WC_STM32_CLK_DIS(reg, bit) \ + do { RCC->reg &= ~(bit); } while (0) + +#if defined(WOLFSSL_STM32H5) + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #ifdef RCC_AHB2ENR_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_SAESEN) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32H7) + /* F2/F4/F7/H7 -- CRYP + HASH + RNG all on AHB2 with identical + * RCC bit names. */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_CRYPEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_CRYPEN) + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32MP13) + /* MP13 -- CRYP1/HASH1/RNG1 on AHB5; CMSIS device header may use + * symbol-suffixed RCC names. Gate each macro on the CMSIS bit so + * a partial device header still compiles. Separate set/clear + * registers, so this arm keeps its own form. */ + #if defined(RCC_MP_AHB5ENSETR_CRYP1EN) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->MP_AHB5ENSETR |= RCC_MP_AHB5ENSETR_CRYP1EN; \ + (void)RCC->MP_AHB5ENSETR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->MP_AHB5ENCLRR = RCC_MP_AHB5ENSETR_CRYP1EN; } while (0) + #endif + #if defined(RCC_MP_AHB5ENSETR_HASH1EN) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->MP_AHB5ENSETR |= RCC_MP_AHB5ENSETR_HASH1EN; \ + (void)RCC->MP_AHB5ENSETR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->MP_AHB5ENCLRR = RCC_MP_AHB5ENSETR_HASH1EN; } while (0) + #endif + #if defined(RCC_MP_AHB5ENSETR_RNG1EN) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->MP_AHB5ENSETR |= RCC_MP_AHB5ENSETR_RNG1EN; \ + (void)RCC->MP_AHB5ENSETR; } while (0) + #endif +#elif defined(WOLFSSL_STM32L4) + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32L5) + /* L5: HASH + RNG on AHB2 (L552). L562 also adds AES + PKA. AES + * clock-enable is gated on the CMSIS symbol so headers that don't + * expose AESEN (L552) skip the define. */ + #ifdef RCC_AHB2ENR_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3 RCC uses AHB2ENR1 (not AHB2ENR). AES bit only present on + * variants that have the peripheral (U585+, U385+). SAES is on the + * same AHB2ENR1; gate on the CMSIS bit so headers without it (e.g. + * U575 which has neither AES nor SAES) skip. */ + #ifdef RCC_AHB2ENR1_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR1, RCC_AHB2ENR1_AESEN) + #endif + #ifdef RCC_AHB2ENR1_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR1, RCC_AHB2ENR1_SAESEN) + #endif + #ifdef RCC_AHB2ENR1_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR1, RCC_AHB2ENR1_HASHEN) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_RNGEN) +#elif defined(WOLFSSL_STM32G0) + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHBENR, RCC_AHBENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHBENR, RCC_AHBENR_AESEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHBENR, RCC_AHBENR_RNGEN) +#elif defined(WOLFSSL_STM32WB) + /* WB55 dual-core: AES1 is the M4 (CPU1) application AES, on AHB2. + * AES2 sits on AHB4/AHB3 and is reserved for the M0+ side / shared use. + * The wolfcrypt port maps CRYP -> AES1 (see CRYP alias above), so use + * AES1's clock-enable bit. RNG is on AHB3. */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AES1EN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AES1EN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_RNGEN) +#elif defined(WOLFSSL_STM32WL) + /* WL55 dual-core: TinyAES + RNG + PKA on M4 (CPU1) side. AES on AHB3, + * RNG on AHB3, PKA on AHB3. No HASH peripheral. V1 PKA layout. */ + #ifdef RCC_AHB3ENR_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_AESEN) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_RNGEN) +#elif defined(WOLFSSL_STM32G4) + /* G4: TinyAES + RNG + PKA on AHB2. No HASH peripheral. */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32WBA) + /* WBA: TinyAES + HASH + RNG + PKA + SAES on AHB2 (PKA on AHB1). */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #ifdef RCC_AHB2ENR_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_SAESEN) + #endif + #ifdef RCC_AHB2ENR_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32C5) + /* C5: TinyAES + HASH + RNG + SAES + PKA all on AHB2. New-gen HASH IP + * (4-bit ALGO field, same as H5/U3/N6). */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_AESEN) + #ifdef RCC_AHB2ENR_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_SAESEN) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) +#elif defined(WOLFSSL_STM32U0) + /* U0: Cortex-M0+ low-end. AES + RNG only (no SAES, no HASH, no PKA, + * no CRYP). Both on the single AHBENR. TinyAES IP, KEYSIZE field + * for 128/256-bit. */ + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHBENR, RCC_AHBENR_AESEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHBENR, RCC_AHBENR_AESEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHBENR, RCC_AHBENR_RNGEN) +#elif defined(WOLFSSL_STM32N6) + /* N6: CRYP + HASH + RNG + SAES + PKA all on AHB3. Note that on N6 + * the AES IP is the older "fat" CRYP (with AAD/header handling in + * register) -- SAES is the newer TinyAES-shape IP and is the one + * routed by the BARE driver when WOLFSSL_STM32_USE_SAES is set. */ + #ifdef RCC_AHB3ENR_CRYPEN + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_CRYPEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_CRYPEN) + #endif + #ifdef RCC_AHB3ENR_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_SAESEN) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_RNGEN) +#elif defined(WOLFSSL_STM32H7S) + /* H7RS/H7S3: classic H7 fat CRYP + classic H7 HASH (same register + * shapes as H753) but RCC clock-enable bits moved to AHB3ENR, and + * V2 PKA + SAES added. All five (CRYP/HASH/RNG/SAES/PKA) live on + * AHB3ENR. */ + #ifdef RCC_AHB3ENR_CRYPEN + #define WC_STM32_AES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_CRYPEN) + #define WC_STM32_AES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_CRYPEN) + #endif + #ifdef RCC_AHB3ENR_SAESEN + #define WC_STM32_SAES_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_SAESEN) + #define WC_STM32_SAES_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_SAESEN) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_HASHEN) + #define WC_STM32_HASH_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB3ENR, RCC_AHB3ENR_HASHEN) + #define WC_STM32_RNG_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB3ENR, RCC_AHB3ENR_RNGEN) +#endif + +/* Build-time AES IP instance selector. Default is the regular AES/CRYP + * peripheral; defining WOLFSSL_STM32_USE_SAES routes the BARE TinyAES + * register-access block to SAES (Secure AES) instead. The TinyAES + * register layout is identical for AES and SAES on H5/U5/WBA/C5. */ +#ifndef WC_STM32_AES_INST + #if defined(WOLFSSL_STM32_USE_SAES) && defined(SAES) + #define WC_STM32_AES_INST SAES + #elif defined(CRYP) + #define WC_STM32_AES_INST CRYP + #elif defined(AES) + /* AES-only chips (G0/L0/U0) -- no fat CRYP, no SAES. The + * symbol CRYP is the legacy alias many newer family headers + * keep for compatibility, but bottom-end chips drop it. */ + #define WC_STM32_AES_INST AES + #elif defined(WOLFSSL_STM32WB) && defined(AES1) + /* WB55 dual-core: CMSIS exposes AES1 (M4-side application AES) + * and AES2 (M0+ Cortex-M0+ radio-side, not addressable from + * the application core). There is no plain `AES` or `CRYP` + * alias in the device header, so pick AES1 directly. */ + #define WC_STM32_AES_INST AES1 + #elif defined(STM32_CRYPTO) + /* Only error when the board has actually asked for STM32_CRYPTO + * but no AES IP is reachable. Chips with NO_STM32_CRYPTO (e.g. + * F767/G491/F767 -- RNG-only parts) don't need an instance. */ + #error "STM32 BARE: no AES/CRYP/SAES instance pointer found" + #endif +#endif + +/* Companion macro for the IP-instance clock enable. Routes to + * WC_STM32_SAES_CLK_ENABLE when WOLFSSL_STM32_USE_SAES is set and the + * family arm above provided the SAES variant; otherwise falls back to + * the regular AES clock. The two are separate AHB enable bits on + * H5/U5/WBA/C5, so toggling the wrong one leaves the IP disabled. */ +#ifndef WC_STM32_AES_CLK_ENABLE_INST + #if defined(WOLFSSL_STM32_USE_SAES) && defined(WC_STM32_SAES_CLK_ENABLE) + #define WC_STM32_AES_CLK_ENABLE_INST() WC_STM32_SAES_CLK_ENABLE() + #else + #define WC_STM32_AES_CLK_ENABLE_INST() WC_STM32_AES_CLK_ENABLE() + #endif +#endif + +/* Some new-gen chips (STM32N6, STM32H7S3) ship CMSIS headers that + * define SAES_TypeDef but not AES_TypeDef. The BARE TinyAES driver + * helpers in stm32.c declare their parameter as `AES_TypeDef*` so the + * same function pointer can target both the regular AES and SAES + * instances on chips that have both. Provide a typedef alias when AES + * is missing -- the IP layout is identical between AES and SAES on + * every family in scope. Gated on having SAES but not the AES_CR_EN + * symbol (used as a sentinel that the CMSIS lacks the AES alias). */ +#if defined(SAES) && !defined(AES_CR_EN) && \ + !defined(WOLFSSL_STM32_AES_TYPEDEF_ALIAS) + typedef SAES_TypeDef AES_TypeDef; + #define WOLFSSL_STM32_AES_TYPEDEF_ALIAS +#endif + +/* SAES-only chips (e.g. STM32N6) have the TinyAES register layout but + * the CMSIS device header only defines SAES_CR_*, SAES_SR_*, SAES_ISR_*, + * SAES_ICR_* without companion AES_CR_* aliases. The BARE driver in + * stm32.c uses the AES_CR_* names directly; provide aliases here so the + * existing code compiles on SAES-only parts. */ +#if !defined(AES_CR_EN) && defined(SAES_CR_EN) + #define AES_CR_EN SAES_CR_EN + #define AES_CR_DATATYPE_1 SAES_CR_DATATYPE_1 + #define AES_CR_MODE SAES_CR_MODE + #define AES_CR_MODE_M SAES_CR_MODE_Msk + #define AES_CR_MODE_Msk SAES_CR_MODE_Msk + #define AES_CR_MODE_0 SAES_CR_MODE_0 + #define AES_CR_MODE_1 SAES_CR_MODE_1 + #define AES_CR_CHMOD_0 SAES_CR_CHMOD_0 + #define AES_CR_CHMOD_1 SAES_CR_CHMOD_1 + #define AES_CR_KEYSIZE SAES_CR_KEYSIZE + #define AES_CR_KEYSEL_0 SAES_CR_KEYSEL_0 + #define AES_CR_KMOD_0 SAES_CR_KMOD_0 + #define AES_CR_KEYPROT SAES_CR_KEYPROT + #define AES_CR_CCFC SAES_CR_CCFC + #define AES_CR_IPRST SAES_CR_IPRST + #define AES_SR_BUSY SAES_SR_BUSY + #define AES_SR_CCF SAES_SR_CCF + #define AES_ISR_CCF SAES_ISR_CCF + #define AES_ICR_CCF SAES_ICR_CCF +#endif + +/* Per-family direct-register clock-enable macro for the PKA peripheral. */ +#if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) + /* WB55 / WL55: PKA clock is on AHB3 (V1 layout) */ + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3: AHB2ENR1.PKAEN */ + #ifdef RCC_AHB2ENR1_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_PKAEN; (void)RCC->AHB2ENR1; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32H5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32L5) + /* L5: PKA on AHB2ENR.PKAEN (bit 19). Only present on L562/L592 + * variants -- L552 has no PKA so the CMSIS bit is absent. */ + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32G4) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32WBA) + /* WBA52: PKA on AHB2ENR.PKAEN bit 21 (NOT AHB1 like the rest of + * the WBA crypto IPs in some other variants -- the WBA52 RM places + * PKA, SAES, RNG all on AHB2 alongside other crypto). The earlier + * AHB1 placement was a copy-paste error from another family; + * `RCC_AHB1ENR_PKAEN` doesn't exist on WBA52 so the macro never + * defined, the clock never got enabled, and HAL_PKA_Init timed + * out at the CR.EN-stick check (CR readback = 0 = clock gated). */ + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32C5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32N6) + /* N6: PKA on AHB3 (same bank as HASH/RNG/CRYP/SAES). V2 layout. */ + #ifdef RCC_AHB3ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } \ + while (0) + #endif +#elif defined(WOLFSSL_STM32H7S) + /* H7S: PKA on AHB3 alongside HASH/RNG/CRYP/SAES. V2 layout. */ + #ifdef RCC_AHB3ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } \ + while (0) + #endif +#endif + +/* HAL-legacy macros that the existing direct-register HASH path depends on. + * Without HAL these aren't otherwise visible. */ +#if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32MP13) || \ + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H7S) || \ + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32C5) + /* New-generation HASH IP. The CMSIS struct shape varies within the + * family list -- H5 renames the instance digest registers from + * `HR[5]` to `HRA[5]`, but U3 / N6 keep the legacy `HR[5]` name + * even though the IP otherwise behaves like the new generation. + * Gate the macro on H5 only (verified by inspection of each + * family's CMSIS header). */ + #if defined(WOLFSSL_STM32H5) + #define WC_STM32_HASH_INSTANCE_HRA + #endif + /* 4-bit ALGO field at bits 20:17 */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #define HASH_ALGOSELECTION_SHA384 (HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1 | \ + HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_224 (HASH_CR_ALGO_0 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_256 (HASH_CR_ALGO_1 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) +#else + /* Older HASH IP (F4/F7/L4 family) ALGO bit mapping (per HAL): + * SHA1 = 0 + * MD5 = ALGO_0 + * SHA224 = ALGO_1 + * SHA256 = ALGO_0 | ALGO_1 + */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_MD5 HASH_CR_ALGO_0 + #ifdef HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #endif +#endif + +/* Legacy CamelCase aliases */ +#ifdef HASH_ALGOSELECTION_SHA1 + #define HASH_AlgoSelection_SHA1 HASH_ALGOSELECTION_SHA1 +#endif +#ifdef HASH_ALGOSELECTION_SHA224 + #define HASH_AlgoSelection_SHA224 HASH_ALGOSELECTION_SHA224 +#endif +#ifdef HASH_ALGOSELECTION_SHA256 + #define HASH_AlgoSelection_SHA256 HASH_ALGOSELECTION_SHA256 +#endif +#ifdef HASH_ALGOSELECTION_SHA384 + #define HASH_AlgoSelection_SHA384 HASH_ALGOSELECTION_SHA384 +#endif +#ifdef HASH_ALGOSELECTION_SHA512 + #define HASH_AlgoSelection_SHA512 HASH_ALGOSELECTION_SHA512 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_224 + #define HASH_AlgoSelection_SHA512_224 HASH_ALGOSELECTION_SHA512_224 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_256 + #define HASH_AlgoSelection_SHA512_256 HASH_ALGOSELECTION_SHA512_256 +#endif +#ifdef HASH_ALGOSELECTION_MD5 + #define HASH_AlgoSelection_MD5 HASH_ALGOSELECTION_MD5 +#endif + +#define HASH_ALGOMODE_HASH 0u +#ifdef HASH_CR_MODE + #define HASH_ALGOMODE_HMAC HASH_CR_MODE +#endif +/* Byte-stream input (auto byte-swap) */ +#ifdef HASH_CR_DATATYPE_1 + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_1 +#elif defined(HASH_CR_DATATYPE_0) + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_0 +#endif + +#endif /* WOLFSSL_STM32_BARE */ + + #ifdef STM32_HASH #include /* for uint32_t */ @@ -38,7 +535,8 @@ /* The HASH_DIGEST register indicates SHA224/SHA256 support */ #define STM32_HASH_SHA2 #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define HASH_CR_SIZE 103 #define HASH_MAX_DIGEST 64 /* Up to SHA512 */ @@ -68,7 +566,8 @@ /* These HASH HAL's have no MD5 implementation */ #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32C5) #define STM32_NOMD5 #endif @@ -163,7 +662,8 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #ifndef NO_AES - #if !defined(STM32_CRYPTO_AES_GCM) && (defined(WOLFSSL_STM32F4) || \ + #if !defined(STM32_CRYPTO_AES_GCM) && !defined(WOLFSSL_STM32_BARE) && \ + (defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L4) || \ defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ @@ -173,6 +673,13 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Hardware supports AES GCM acceleration */ #define STM32_CRYPTO_AES_GCM #endif + /* Under WOLFSSL_STM32_BARE on the CRYP IP (F2/F4/F7/H7/MP13), the GCM + * HW phase machine (init/header/payload/final) is engaged for whole- + * block PT with a 12-byte IV; partial blocks and non-12B IVs return + * CRYPTOCB_UNAVAILABLE so aes.c falls back to SW GHASH + HW ECB. On + * the TinyAES IP the BARE driver always returns CRYPTOCB_UNAVAILABLE + * for GCM (no HW phase machine) and the SW GHASH + HW ECB path is + * used. GCM decrypt is always SW + HW ECB on both IPs in v1. */ #if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) || \ defined(WOLFSSL_STM32WBA) @@ -186,12 +693,14 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ - defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) || defined(WOLFSSL_STM32C5) #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32U5) || \ - defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) || defined(WOLFSSL_STM32C5) #define STM32_CRYPTO_AES_ONLY /* crypto engine only supports AES */ #endif - #if defined(WOLFSSL_STM32H5) + #if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32C5) #define __HAL_RCC_CRYP_CLK_DISABLE __HAL_RCC_AES_CLK_DISABLE #define __HAL_RCC_CRYP_CLK_ENABLE __HAL_RCC_AES_CLK_ENABLE #endif @@ -234,7 +743,23 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #define STM32_GCM_IV_START 2 struct Aes; - #ifdef WOLFSSL_STM32_CUBEMX + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal direct-register AES driver. ECB and CBC are HW-native; + * CTR is provided automatically via the ECB-as-transform path in + * aes.c (XTRANSFORM_AESCTRBLOCK); GCM is HW-native for the case + * the CRYP IP supports (12-byte IV + whole-block PT) and returns + * CRYPTOCB_UNAVAILABLE otherwise so aes.c can fall back to SW + * GHASH (which still uses HW ECB for the underlying AES blocks). */ + int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, + word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc); + #elif defined(WOLFSSL_STM32_CUBEMX) int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_HandleTypeDef* hcryp, int useSAES); void wc_Stm32_Aes_Cleanup(void); @@ -242,32 +767,24 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_InitTypeDef* cryptInit, CRYP_KeyInitTypeDef* keyInit); void wc_Stm32_Aes_Cleanup(void); - #endif /* WOLFSSL_STM32_CUBEMX */ + #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ -#if defined(WOLFSSL_STM32U5_DHUK) && !defined(WOLFSSL_STM32U5_DHUK_DEVID) - #define WOLFSSL_STM32U5_DHUK_DEVID 808 - #define WOLFSSL_STM32U5_SAES_DEVID 807 - #define WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID 809 - int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, - word32* outSz, const byte* iv, int ivSz); - int wc_Stm32_Aes_UnWrap(struct Aes* aes, CRYP_HandleTypeDef* hcryp, const byte* in, - word32 inSz, const byte* iv, int ivSz); - int wc_Stm32_Aes_SetDHUK_IV(struct Aes* aes, const byte* iv, int ivSz); +#ifdef WOLFSSL_STM32_BARE + /* Optional exact-key import primitive: unwrap a DHUK-wrapped key into SAES + * KEYR and ECB/CBC with it. _ex `isCbc`: 0=ECB, 1=CBC. Returns + * CRYPTOCB_UNAVAILABLE unless built with WOLFSSL_STM32_DHUK_UNWRAP. Not + * auto-routed -- call explicitly (DHUK uses the cryptocb path). */ + int wc_Stm32_Aes_DhukOp(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_DhukOp_ex(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc, int isCbc); +#endif #endif #if defined(WOLFSSL_STM32_PKA) && defined(HAVE_ECC) -struct ecc_key; -struct WC_RNG; - -int stm32_ecc_verify_hash_ex(MATH_INT_T *r, MATH_INT_T *s, const byte* hash, - word32 hashlen, int* res, struct ecc_key* key); - -int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, struct WC_RNG* rng, - struct ecc_key* key, MATH_INT_T *r, MATH_INT_T *s); -#endif /* WOLFSSL_STM32_PKA && HAVE_ECC */ #endif /* _WOLFPORT_STM32_H_ */ diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 9f699145847..eb2e2ba7541 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2265,13 +2265,16 @@ #if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32F1) || \ + defined(WOLFSSL_STM32F3) || \ defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32H7) || \ defined(WOLFSSL_STM32G0) || defined(WOLFSSL_STM32U5) || \ - defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32U0) || \ + defined(WOLFSSL_STM32H5) || \ defined(WOLFSSL_STM32WL) || defined(WOLFSSL_STM32G4) || \ defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32N6) + defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32N6) || \ + defined(WOLFSSL_STM32C5) #define SIZEOF_LONG_LONG 8 #ifndef CHAR_BIT @@ -2293,8 +2296,17 @@ #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32U5) || \ defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32WL) || \ - defined(WOLFSSL_STM32WBA) - #define NO_AES_192 /* hardware does not support 192-bit */ + defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32C5) || \ + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G4) || \ + defined(WOLFSSL_STM32G0) || defined(WOLFSSL_STM32U0) + #define NO_AES_192 /* TinyAES IP does not support 192-bit */ + #endif + /* STM32N6's default AES instance is the older "fat" CRYP, which DOES + * support AES-192. Only the BARE port that routes AES through the SAES + * (TinyAES, no 192-bit) lacks it -- so gate NO_AES_192 on that, leaving + * 192-bit available for N6 CubeMX/CRYP builds. */ + #if defined(WOLFSSL_STM32N6) && defined(WOLFSSL_STM32_USE_SAES) + #define NO_AES_192 #endif #endif #ifndef NO_STM32_HASH @@ -2309,7 +2321,72 @@ #define KEIL_INTRINSICS #endif #define NO_OLD_RNGNAME - #ifdef WOLFSSL_STM32_CUBEMX + + #if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_CUBEMX) + #error "WOLFSSL_STM32_BARE and WOLFSSL_STM32_CUBEMX are mutually \ + exclusive" + #endif + /* WOLFSSL_STM32_PKA is now supported under WOLFSSL_STM32_BARE via the + * direct-register PKA driver in wolfcrypt/src/port/st/stm32.c. */ + + #ifdef WOLFSSL_STM32_BARE + /* Direct register access; no HAL or StdPeriph driver. Pull in only the + * CMSIS device header. Existing direct-register HASH path is reused; + * RNG goes through the existing WOLFSSL_STM32_RNG_NOLIB path. */ + #ifndef WOLFSSL_STM32_RNG_NOLIB + #define WOLFSSL_STM32_RNG_NOLIB + #endif + #if defined(WOLFSSL_STM32F1) + #include "stm32f1xx.h" + #elif defined(WOLFSSL_STM32F2) + #include "stm32f2xx.h" + #elif defined(WOLFSSL_STM32F3) + #include "stm32f3xx.h" + #elif defined(WOLFSSL_STM32F4) + #include "stm32f4xx.h" + #elif defined(WOLFSSL_STM32F7) + #include "stm32f7xx.h" + #elif defined(WOLFSSL_STM32L4) + #include "stm32l4xx.h" + #elif defined(WOLFSSL_STM32L5) + #include "stm32l5xx.h" + #elif defined(WOLFSSL_STM32H7S) + #include "stm32h7rsxx.h" + #elif defined(WOLFSSL_STM32H7) + #include "stm32h7xx.h" + #elif defined(WOLFSSL_STM32WB) + #include "stm32wbxx.h" + #elif defined(WOLFSSL_STM32WL) + #include "stm32wlxx.h" + #elif defined(WOLFSSL_STM32G0) + #include "stm32g0xx.h" + #elif defined(WOLFSSL_STM32G4) + #include "stm32g4xx.h" + #elif defined(WOLFSSL_STM32U5) + #include "stm32u5xx.h" + #elif defined(WOLFSSL_STM32U3) + #include "stm32u3xx.h" + #elif defined(WOLFSSL_STM32U0) + #include "stm32u0xx.h" + #elif defined(WOLFSSL_STM32H5) + #include "stm32h5xx.h" + #elif defined(WOLFSSL_STM32C5) + #include "stm32c5xx.h" + #elif defined(WOLFSSL_STM32N6) + #include "stm32n6xx.h" + #elif defined(WOLFSSL_STM32MP13) + #ifndef __ASSEMBLER__ + #include "stm32mp13xx.h" + #endif + #elif defined(WOLFSSL_STM32WBA) + #include "stm32wbaxx.h" + #else + #error "WOLFSSL_STM32_BARE requires a STM32 family macro \ + (e.g. WOLFSSL_STM32F4, WOLFSSL_STM32H5, WOLFSSL_STM32U5, \ + WOLFSSL_STM32N6, ...). Define the matching family flag in \ + user_settings.h." + #endif + #elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32F1) #include "stm32f1xx_hal.h" #elif defined(WOLFSSL_STM32F2) @@ -2342,6 +2419,8 @@ #include "stm32u3xx_hal.h" #elif defined(WOLFSSL_STM32H5) #include "stm32h5xx_hal.h" + #elif defined(WOLFSSL_STM32C5) + #include "stm32c5xx_hal.h" #elif defined(WOLFSSL_STM32N6) #include "stm32n6xx_hal.h" #elif defined(WOLFSSL_STM32MP13) From dfdce31b4c4a6dae0f69cdd6300d399f49c5464e Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 25 Jun 2026 12:38:11 -0700 Subject: [PATCH 2/3] Add STM32 DHUK, CCB and STM32C5 PKA/CCB support --- .wolfssl_known_macro_extras | 7 + .../header_files/doxygen_groups.h | 1 + doc/dox_comments/header_files/ecc.h | 101 + doc/dox_comments/header_files/stm32.h | 169 ++ wolfcrypt/src/aes.c | 42 +- wolfcrypt/src/ecc.c | 248 +- wolfcrypt/src/port/st/README.md | 143 +- wolfcrypt/src/port/st/stm32.c | 2632 ++++++++++++++++- wolfssl/wolfcrypt/ecc.h | 81 + wolfssl/wolfcrypt/port/st/stm32.h | 198 ++ 10 files changed, 3499 insertions(+), 123 deletions(-) create mode 100644 doc/dox_comments/header_files/stm32.h diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 917a9958d0a..2a2c9668038 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -520,6 +520,7 @@ QAT_ENABLE_RNG QAT_USE_POLLING_CHECK RCC_AHB1ENR_PKAEN RCC_AHB2ENR1_AESEN +RCC_AHB2ENR1_CCBEN RCC_AHB2ENR1_HASHEN RCC_AHB2ENR1_PKAEN RCC_AHB2ENR1_SAESEN @@ -527,6 +528,7 @@ RCC_AHB2ENR_AESEN RCC_AHB2ENR_HASHEN RCC_AHB2ENR_PKAEN RCC_AHB2ENR_SAESEN +RCC_AHB2RSTR1_CCBRST RCC_AHB2RSTR_PKARST RCC_AHB3ENR_AESEN RCC_AHB3ENR_CRYPEN @@ -534,6 +536,7 @@ RCC_AHB3ENR_HASHEN RCC_AHB3ENR_PKAEN RCC_AHB3ENR_RNGEN RCC_AHB3ENR_SAESEN +RCC_CR_SHSION RCC_MP_AHB5ENSETR_CRYP1EN RCC_MP_AHB5ENSETR_HASH1EN RCC_MP_AHB5ENSETR_RNG1EN @@ -602,6 +605,7 @@ STM32F777xx STM32G071xx STM32G491xx STM32H563xx +STM32H573xx STM32H723xx STM32H725xx STM32H743xx @@ -983,6 +987,9 @@ WOLFSSL_STM32C5 WOLFSSL_STM32F3 WOLFSSL_STM32F427_RNG WOLFSSL_STM32U0 +WOLFSSL_STM32_CCB +WOLFSSL_STM32_DHUK_UNWRAP +WOLFSSL_STM32_USE_SAES WOLFSSL_STRONGEST_HASH_SIG WOLFSSL_STSAFE_TAKES_SLOT WOLFSSL_TELIT_M2MB diff --git a/doc/dox_comments/header_files/doxygen_groups.h b/doc/dox_comments/header_files/doxygen_groups.h index 1f308964f91..78614a3c3ba 100644 --- a/doc/dox_comments/header_files/doxygen_groups.h +++ b/doc/dox_comments/header_files/doxygen_groups.h @@ -308,4 +308,5 @@ \defgroup Setup wolfSSL Context and Session Set Up \defgroup IO wolfSSL Connection, Session, and I/O \defgroup Debug wolfSSL Error Handling and Reporting + \defgroup STM32 STM32 Hardware Crypto Port */ diff --git a/doc/dox_comments/header_files/ecc.h b/doc/dox_comments/header_files/ecc.h index 45d75f3449a..e103555780b 100644 --- a/doc/dox_comments/header_files/ecc.h +++ b/doc/dox_comments/header_files/ecc.h @@ -1283,6 +1283,107 @@ int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key); int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub, word32 pubSz, ecc_key* key); +/*! + \ingroup ECC + + \brief This function imports an STM32 DHUK-protected private key onto an + ecc_key for transparent hardware signing. The private scalar is supplied as + a chip-bound wrapped blob together with the 256-bit derivation seed; the + plaintext scalar is never imported. The key must be bound to the STM32 DHUK + crypto-callback device (init with wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID) + after registering the device with wc_Stm32_DhukRegister). Available only on + STM32 bare-metal builds (WOLFSSL_STM32_BARE) with WOLFSSL_DHUK, a + DHUK-capable SAES (WC_STM32_HAS_DHUK), and HW PKA signing enabled + (WOLFSSL_STM32_PKA, not WC_STM32_PKA_VERIFY_ONLY). + + \return 0 Returned on success. + \return BAD_FUNC_ARG Returned if key, seed, or wrapped is NULL; if seedSz is + not 32; if wrappedLen is zero or not a multiple of the AES block size; if + wrappedLen exceeds the on-key blob buffer; if plainLen is zero or larger + than wrappedLen; or if wrappedLen is larger than plainLen padded to a full + AES block. + + \param key pointer to the ecc_key (bound to WC_DHUK_DEVID) to import into. + \param seed pointer to the 256-bit (32-byte) per-key DHUK derivation seed. + \param seedSz length of seed in bytes; must be 32. + \param wrapped pointer to the DHUK-wrapped private scalar blob. + \param wrappedLen length of the wrapped blob; a non-zero multiple of the AES + block size, no larger than the on-key buffer. + \param plainLen length in bytes of the plaintext scalar inside the blob. + + _Example_ + \code + ecc_key key; + wc_Stm32_DhukRegister(WC_DHUK_DEVID); + wc_ecc_init_ex(&key, NULL, WC_DHUK_DEVID); + if (wc_ecc_import_wrapped_private(&key, seed, 32, wrapped, wrappedLen, + plainLen) == 0) { + wc_ecc_sign_hash(hash, hashLen, sig, &sigLen, &rng, &key); + } + wc_ecc_free(&key); + \endcode + + \sa wc_ecc_import_wrapped_private_ex + \sa wc_ecc_sign_hash + \sa wc_ecc_init_ex +*/ +int wc_ecc_import_wrapped_private(ecc_key* key, const byte* seed, word32 seedSz, + const byte* wrapped, word32 wrappedLen, + word32 plainLen); + +/*! + \ingroup ECC + + \brief This function restores a previously provisioned STM32 CCB-protected + ECDSA key onto an ecc_key. The device-bound key is supplied as the wrapped + scalar blob plus its AES-GCM iv/tag and the in-clear public key; signing is + performed transparently with the scalar unwrapped SAES->PKA in hardware. The + key must be bound to the STM32 DHUK/CCB crypto-callback device (init with + wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). Available only on STM32 builds + with WOLFSSL_DHUK and WOLFSSL_STM32_CCB. + + \return 0 Returned on success. + \return BAD_FUNC_ARG Returned if key, wrapped, iv, tag, or pub is NULL; if + ivLen or tagLen is not 16; if curve_id is not a supported curve; if + wrappedLen is zero or exceeds the on-key blob buffer; or if pubLen is not + twice the curve modulus size. + \return <0 A negative error code may be returned if importing the public key + fails. + + \param key pointer to the ecc_key (bound to WC_DHUK_DEVID) to import into. + \param curve_id the ECC curve id of the wrapped key (e.g. ECC_SECP256R1). + \param wrapped pointer to the CCB wrapped private scalar blob. + \param wrappedLen length of the wrapped blob, no larger than the on-key + buffer. + \param iv pointer to the 16-byte AES-GCM iv of the blob. + \param ivLen length of iv in bytes; must be 16. + \param tag pointer to the 16-byte AES-GCM authentication tag of the blob. + \param tagLen length of tag in bytes; must be 16. + \param pub pointer to the public key in uncompressed qx||qy form. + \param pubLen length of pub in bytes; must be twice the curve modulus size. + + _Example_ + \code + ecc_key key; + wc_Stm32_DhukRegister(WC_DHUK_DEVID); + wc_ecc_init_ex(&key, NULL, WC_DHUK_DEVID); + if (wc_ecc_import_wrapped_private_ex(&key, ECC_SECP256R1, wrapped, + wrappedLen, iv, 16, tag, 16, pub, pubLen) == 0) { + wc_ecc_sign_hash(hash, hashLen, sig, &sigLen, &rng, &key); + } + wc_ecc_free(&key); + \endcode + + \sa wc_ecc_import_wrapped_private + \sa wc_ecc_make_key_ex + \sa wc_ecc_sign_hash +*/ +int wc_ecc_import_wrapped_private_ex(ecc_key* key, int curve_id, + const byte* wrapped, word32 wrappedLen, + const byte* iv, word32 ivLen, + const byte* tag, word32 tagLen, + const byte* pub, word32 pubLen); + /*! \ingroup ECC diff --git a/doc/dox_comments/header_files/stm32.h b/doc/dox_comments/header_files/stm32.h new file mode 100644 index 00000000000..40ccbf8645e --- /dev/null +++ b/doc/dox_comments/header_files/stm32.h @@ -0,0 +1,169 @@ +/*! + \ingroup STM32 + + \brief This function registers the STM32 DHUK (Device Hardware Unique Key) + crypto-callback device. After registering at WC_DHUK_DEVID, bind an object + to the device by setting its devId at init (wc_AesInit / wc_ecc_init_ex) and + supply the per-key 256-bit seed as the key (wc_AesGcmSetKey) or via + wc_ecc_import_wrapped_private; normal wolfCrypt AES / GMAC / ECDSA calls then + run transparently with the working key derived inside the SAES. Available on + STM32 builds with WOLFSSL_DHUK, WOLF_CRYPTO_CB, and a DHUK-capable SAES + (WC_STM32_HAS_DHUK); on the CubeMX path it is also provided for CCB ECDSA. + + \return 0 Returned on success. + \return <0 A negative error code is returned if device registration fails. + + \param devId the crypto-callback device id to register (use WC_DHUK_DEVID). + + _Example_ + \code + Aes aes; + wc_Stm32_DhukRegister(WC_DHUK_DEVID); + wc_AesInit(&aes, NULL, WC_DHUK_DEVID); + wc_AesGcmSetKey(&aes, seed, 32); + wc_AesGcmEncrypt(&aes, NULL, NULL, 0, iv, ivSz, tag, tagSz, aad, aadSz); + wc_AesFree(&aes); + wc_Stm32_DhukUnRegister(WC_DHUK_DEVID); + \endcode + + \sa wc_Stm32_DhukUnRegister + \sa wc_ecc_import_wrapped_private +*/ +int wc_Stm32_DhukRegister(int devId); + +/*! + \ingroup STM32 + + \brief This function unregisters the STM32 DHUK crypto-callback device that + was registered with wc_Stm32_DhukRegister. Call it once transparent DHUK / CCB + operations are complete. + + \return none No return value. + + \param devId the crypto-callback device id to unregister (WC_DHUK_DEVID). + + _Example_ + \code + wc_Stm32_DhukUnRegister(WC_DHUK_DEVID); + \endcode + + \sa wc_Stm32_DhukRegister +*/ +void wc_Stm32_DhukUnRegister(int devId); + +/*! + \ingroup STM32 + + \brief This function performs a chip-bound DHUK AES key-wrap on the SAES + (KEYSEL=HW, deterministic output) and is retained for provisioning wrapped + key material. The wrap-key source is selected by aes->devId + (WOLFSSL_DHUK_DEVID for the hardware DHUK, otherwise a software key in + aes->key). An optional iv selects CBC instead of ECB. Available on STM32 + builds with WOLFSSL_DHUK and a DHUK-capable SAES. + + \return 0 Returned on success. + \return BAD_FUNC_ARG Returned if a required pointer is NULL, if inSz is not a + supported block size, if the iv is non-NULL with ivSz != 16, or (software-key + path) if the wrapping key length is not 16 or 32. + \return <0 A negative error code may be returned on a hardware error. + + \param aes pointer to an initialized Aes; aes->devId selects the wrap key. + \param in pointer to the input key bytes to wrap. + \param inSz length of in in bytes. + \param out pointer to the output buffer for the wrapped key. + \param outSz on input the size of out; on output the bytes written. + \param iv optional 16-byte iv; NULL selects ECB, non-NULL selects CBC. + \param ivSz length of iv in bytes when iv is non-NULL; must be 16. + + \sa wc_Stm32_DhukRegister +*/ +int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, + word32* outSz, const byte* iv, int ivSz); + +/*! + \ingroup STM32 + + \brief This function brings up the STM32 CCB (Coupling and Chaining Bridge) + peripheral and reports whether it is usable: it enables the CCB / PKA / SAES + / RNG clocks, pulse-resets the engines, waits for BUSY to clear, and checks + for an operation error. Bare-metal only (WOLFSSL_STM32_BARE). The transparent + CCB sign path calls this internally, so most callers do not invoke it + directly. Available on STM32 builds with WOLFSSL_STM32_CCB on CCB silicon + (STM32U3 or STM32C5). + + \return 0 Returned when the CCB is up and usable. + \return WC_TIMEOUT_E Returned if BUSY does not clear within the timeout. + \return <0 A negative error code is returned if the CCB reports an error. + + \param none This function takes no parameters. + + \sa wc_Stm32_Ccb_EccMakeBlob + \sa wc_Stm32_Ccb_EccSign +*/ +int wc_Stm32_CcbInit(void); + +/*! + \ingroup STM32 + + \brief This function creates an STM32 CCB ECDSA-signature blob from a clear + private scalar on-device. The scalar is wrapped under the silicon DHUK; the + returned blob (iv[16] + tag[16] + wrapped scalar) and the derived public key + (pubX[32] / pubY[32]) can be persisted and later reloaded with + wc_ecc_import_wrapped_private_ex. The hardware self-verifies the blob before + returning. Currently P-256 (ECC_SECP256R1). Available on STM32 builds with + WOLFSSL_STM32_CCB on CCB silicon (STM32U3 or STM32C5). + + \return 0 Returned on success. + \return NOT_COMPILED_IN Returned if curveId is an unsupported curve. + \return BAD_FUNC_ARG Returned if a required pointer is NULL or dLen is wrong. + \return WC_TIMEOUT_E Returned if a hardware step times out. + + \param curveId the ECC curve id; currently ECC_SECP256R1. + \param d pointer to the clear private scalar to wrap. + \param dLen length of d in bytes. + \param iv output buffer for the 16-byte blob iv. + \param tag output buffer for the 16-byte blob authentication tag. + \param wrapped output buffer for the wrapped scalar. + \param wrappedSz on output the length of the wrapped scalar in bytes. + \param pubX output buffer for the 32-byte public key X coordinate. + \param pubY output buffer for the 32-byte public key Y coordinate. + + \sa wc_Stm32_Ccb_EccSign + \sa wc_ecc_import_wrapped_private_ex +*/ +int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, + byte* iv, byte* tag, byte* wrapped, word32* wrappedSz, + byte* pubX, byte* pubY); + +/*! + \ingroup STM32 + + \brief This function signs a hash with an STM32 CCB ECDSA blob. The private + scalar is unwrapped inside the hardware (SAES->PKA over the CCB local bus) + and never enters software or crosses the system bus. The (r, s) signature is + written to the caller's 32-byte buffers. Currently P-256 (ECC_SECP256R1). + Available on STM32 builds with WOLFSSL_STM32_CCB on CCB silicon (STM32U3 or + STM32C5). Most callers reach this transparently via wc_ecc_sign_hash on a + WC_DHUK_DEVID key rather than calling it directly. + + \return 0 Returned on success. + \return NOT_COMPILED_IN Returned if curveId is an unsupported curve. + \return BAD_FUNC_ARG Returned if a required pointer is NULL. + \return WC_TIMEOUT_E Returned if a hardware step times out. + + \param curveId the ECC curve id; currently ECC_SECP256R1. + \param iv pointer to the 16-byte blob iv. + \param tag pointer to the 16-byte blob authentication tag. + \param wrapped pointer to the wrapped scalar from wc_Stm32_Ccb_EccMakeBlob. + \param wrappedSz length of wrapped in bytes. + \param hash pointer to the hash to sign. + \param hashSz length of hash in bytes. + \param r output buffer for the 32-byte signature r value. + \param s output buffer for the 32-byte signature s value. + + \sa wc_Stm32_Ccb_EccMakeBlob + \sa wc_ecc_sign_hash +*/ +int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, + const byte* wrapped, word32 wrappedSz, const byte* hash, word32 hashSz, + byte* r, byte* s); diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 155934c510e..8d99481ca63 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -236,6 +236,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #ifdef WOLFSSL_STM32_BARE /* Bare-metal driver handles mutex, clock and key/IV internally. * DHUK is routed via the crypto-callback framework, not here. */ + #ifdef WC_DEBUG_CIPHER_LIFECYCLE + int ret = wc_debug_CipherLifecycleCheck(aes->CipherLifecycleTag, 0); + if (ret < 0) + return ret; + #endif return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 1); #else int ret = 0; @@ -346,6 +351,11 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits { #ifdef WOLFSSL_STM32_BARE /* DHUK is routed via the crypto-callback framework, not here. */ + #ifdef WC_DEBUG_CIPHER_LIFECYCLE + int ret = wc_debug_CipherLifecycleCheck(aes->CipherLifecycleTag, 0); + if (ret < 0) + return ret; + #endif return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 0); #else int ret = 0; @@ -4154,6 +4164,18 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock, aes->keylen = keylen; aes->rounds = keylen/4 + 6; XMEMCPY(rk, userKey, keylen); + #ifdef WOLF_CRYPTO_CB + /* Keep a raw (non-reversed) copy for crypto-callback offload, e.g. the + * DHUK device reads the seed from devKey. Mirrors the generic + * wc_AesSetKey cryptocb path: only for a cryptocb-bound key, and reject + * an oversized key (matches the other devKey copy sites in this file). */ + if (aes->devId != INVALID_DEVID) { + if (keylen > sizeof(aes->devKey)) { + return BAD_FUNC_ARG; + } + XMEMCPY(aes->devKey, userKey, keylen); + } + #endif #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2) ByteReverseWords(rk, rk, keylen); #endif @@ -5720,8 +5742,17 @@ int wc_AesSetIV(Aes* aes, const byte* iv) if (sz == 0) { return 0; } - /* DHUK is routed via the crypto-callback framework, not here. - * wc_Stm32_Aes_Cbc processes whole blocks and ignores any sub-block +#if defined(WOLFSSL_DHUK) && defined(WOLF_CRYPTO_CB) + /* Transparent DHUK AES is ECB/GCM only (those route through the crypto + * callback). CBC has no crypto-callback entry on the BARE path, so a + * DHUK key -- devId == WC_DHUK_DEVID, where wc_AesSetKey stored the + * derivation seed in aes->key -- would run with the seed as the AES + * key. Reject rather than silently produce a non-device-bound result. */ + if (aes->devId == WC_DHUK_DEVID) { + return NOT_COMPILED_IN; + } +#endif + /* wc_Stm32_Aes_Cbc processes whole blocks and ignores any sub-block * remainder, matching the SW / CUBEMX CBC backends; define * WOLFSSL_AES_CBC_LENGTH_CHECKS (above) to reject a non-block-multiple * length with BAD_LENGTH_E instead. */ @@ -5738,7 +5769,12 @@ int wc_AesSetIV(Aes* aes, const byte* iv) if (sz == 0) { return 0; } - /* DHUK is routed via the crypto-callback framework, not here. */ +#if defined(WOLFSSL_DHUK) && defined(WOLF_CRYPTO_CB) + /* DHUK keys are unsupported for CBC -- see wc_AesCbcEncrypt above. */ + if (aes->devId == WC_DHUK_DEVID) { + return NOT_COMPILED_IN; + } +#endif return wc_Stm32_Aes_Cbc(aes, out, in, sz, 0); } #endif /* HAVE_AES_DECRYPT */ diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index e0d77511eca..14c0c3e9494 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -246,7 +246,9 @@ ECC Curve Sizes: #include #endif -#if defined(WOLFSSL_STM32_PKA) +#if defined(WOLFSSL_STM32_PKA) || defined(WOLFSSL_STM32_CCB) + /* CCB without PKA still needs stm32.h so its consistency #errors (e.g. CCB + * requires DHUK + BARE/CUBEMX) are visible to this translation unit. */ #include #endif @@ -280,8 +282,15 @@ ECC Curve Sizes: !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ !defined(WOLFSSL_KCAPI_ECC) && !defined(WOLFSSL_SE050) && \ !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && \ - !defined(WOLFSSL_STM32_PKA) && \ + (!defined(WOLFSSL_STM32_PKA) || defined(WC_STM32_PKA_SIGN_ONLY)) && \ !defined(WOLFSSL_PSOC6_CRYPTO) + /* STM32 sign-only (e.g. C5): the HW PKA cannot run the integrated ECDSA + * verify (mode 0x26) correctly, so use the SW verify helper rather than the + * HW-accelerator sigRS path. Note the helper's scalar multiplications still + * run on the HW PKA generic-mul (stm32.c provides wc_ecc_mulmod_ex under + * !WC_STM32_PKA_VERIFY_ONLY) -- the C5 issue is specifically the verify-mode + * wrapper, not the point math, which is exercised and correct (the lead for + * a future real HW verify). */ #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -7065,6 +7074,55 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s); } +#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_BARE) && \ + defined(WC_STM32_HAS_DHUK) +/* Import a hardware-wrapped ECC private scalar + its derivation seed onto the + * ecc_key for the DHUK crypto-callback sign path. The scalar is AES-encrypted + * (offline or on-chip) with the device key that the SAES derives from the seed; + * at sign time it is decrypted into a short-lived buffer. The devId is NOT set + * here -- enable the device by setting devId at init + * (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). See ecc.h for the contract. */ +int wc_ecc_import_wrapped_private(ecc_key* key, const byte* seed, word32 seedSz, + const byte* wrapped, word32 wrappedLen, + word32 plainLen) +{ + if (key == NULL || seed == NULL || wrapped == NULL) { + return BAD_FUNC_ARG; + } + /* Seed is the 256-bit DHUK derivation secret. */ + if (seedSz != sizeof(key->dhuk_seed)) { + return BAD_FUNC_ARG; + } + /* Wrapped scalar blob must be a non-zero multiple of one AES block. */ + if (wrappedLen == 0u || (wrappedLen % 16u) != 0u) { + return BAD_FUNC_ARG; + } + if (wrappedLen > sizeof(key->dhuk_wrapped_priv)) { + return BAD_FUNC_ARG; + } + /* Plain length must fit inside the wrapped blob and be non-zero. */ + if (plainLen == 0u || plainLen > wrappedLen) { + return BAD_FUNC_ARG; + } + /* Wrapped blob must be no larger than the plaintext padded up to a full + * AES block; a larger blob is malformed and would overrun the fixed-size + * unwrap buffer used during signing. */ + if (wrappedLen > ((plainLen + 15u) & ~15u)) { + return BAD_FUNC_ARG; + } + XMEMCPY(key->dhuk_wrapped_priv, wrapped, wrappedLen); + XMEMCPY(key->dhuk_seed, seed, seedSz); + key->dhuk_wrapped_priv_len = wrappedLen; + key->dhuk_plain_priv_len = plainLen; + key->dhuk_seed_sz = seedSz; +#ifdef WOLFSSL_STM32_CCB + /* This is a DHUK seed-wrapped (non-CCB) scalar; clear any CCB blob + * routing left from a prior import so signing uses the DHUK path. */ + key->dhuk_is_ccb = 0; +#endif + return 0; +} +#endif /* WOLFSSL_DHUK && WOLFSSL_STM32_BARE && WC_STM32_HAS_DHUK */ #elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_MICROCHIP_TA100) && \ @@ -8055,6 +8113,175 @@ int wc_ecc_sign_set_k(const byte* k, word32 klen, ecc_key* key) #endif /* WOLFSSL_ECDSA_SET_K || WOLFSSL_ECDSA_SET_K_ONE_LOOP */ #endif /* WOLFSSL_ATECC508A && WOLFSSL_CRYPTOCELL */ +/* Guard must match the ecc.h prototype and the ccb_ / dhuk_ ecc_key struct + * members (both WOLFSSL_DHUK && WOLFSSL_STM32_CCB) -- the implementation must + * not be broader than the members it dereferences. */ +#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_CCB) +/* Load a previously provisioned device-protected ECDSA blob (wrapped scalar + + * AES-GCM iv/tag) and its public key onto the ecc_key. Sets the curve so the + * sign path can derive the parameters, and marks the key for the device + * crypto-callback. The caller enables the device with + * wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID). Generic name -- the wrapping is + * the STM32 CCB, but the surface is not CCB-specific. */ +int wc_ecc_import_wrapped_private_ex(ecc_key* key, int curve_id, + const byte* wrapped, word32 wrappedLen, + const byte* iv, word32 ivLen, + const byte* tag, word32 tagLen, + const byte* pub, word32 pubLen) +{ + int modSz; + int ret; + + if (key == NULL || wrapped == NULL || iv == NULL || tag == NULL || + pub == NULL) { + return BAD_FUNC_ARG; + } + /* Fixed 16-byte AES-GCM iv/tag -- validate explicitly (no over-read). */ + if (ivLen != sizeof(key->ccb_iv) || tagLen != sizeof(key->ccb_tag)) { + return BAD_FUNC_ARG; + } + modSz = wc_ecc_get_curve_size_from_id(curve_id); + if (modSz <= 0) { + return BAD_FUNC_ARG; + } + /* The CCB hardware sign path (wc_Stm32_Ccb_EccSign) supports only P-256. + * Reject other curves here so an import cannot succeed for a key that can + * never be signed with, rather than failing later at sign time. */ + if (curve_id != ECC_SECP256R1) { + return BAD_FUNC_ARG; + } + if (wrappedLen == 0u || wrappedLen > sizeof(key->dhuk_wrapped_priv)) { + return BAD_FUNC_ARG; + } + if (pubLen != (word32)(2 * modSz)) { + return BAD_FUNC_ARG; + } + /* Import public key (qx||qy) + set the curve (key->dp). */ + ret = wc_ecc_import_unsigned(key, pub, pub + modSz, NULL, curve_id); + if (ret != 0) { + return ret; + } + XMEMCPY(key->dhuk_wrapped_priv, wrapped, wrappedLen); + key->dhuk_wrapped_priv_len = wrappedLen; + XMEMCPY(key->ccb_iv, iv, sizeof(key->ccb_iv)); + XMEMCPY(key->ccb_tag, tag, sizeof(key->ccb_tag)); + key->dhuk_is_ccb = 1; + /* Clear any DHUK seed-import state left from a prior import; the CCB + * path keys off the wrapped blob + iv/tag, not the seed. */ + ForceZero(key->dhuk_seed, sizeof(key->dhuk_seed)); + key->dhuk_seed_sz = 0; + key->dhuk_plain_priv_len = 0; + return 0; +} + +/* Crypto-callback keygen handler (WC_PK_TYPE_EC_KEYGEN). Provisions a fresh + * device-protected key: generate a scalar, have the CCB wrap it into a + * device-bound blob and derive its public key, and store both on the ecc_key + * (the scalar is zeroized and never leaves this call / the hardware). The + * scalar is generated in software with the supplied rng on a throwaway key with + * no devId (so no callback recursion). Returns CRYPTOCB_UNAVAILABLE for curves + * the CCB cannot wrap so keygen falls back to software. Not a public entry + * point -- reached via wc_ecc_make_key() on a WC_DHUK_DEVID key. */ +int wc_ecc_dev_make_key(WC_RNG* rng, int keysize, ecc_key* key, int curve_id) +{ +#ifdef WOLFSSL_SMALL_STACK + ecc_key* tmp = NULL; /* ecc_key is ~1-2KB -- heap it on the + * small-stack embedded targets this port + * runs on (repo convention). */ +#else + ecc_key tmp[1]; +#endif +#ifdef WOLFSSL_SMALL_STACK + /* d || pub || wrapped in one heap scratch buffer (~326 B) -- keep the + * fixed byte buffers off the stack too, like tmp above. */ + byte* scratch = NULL; + byte* d; + byte* pub; /* qx || qy, contiguous */ + byte* wrapped; +#else + byte d[MAX_ECC_BYTES]; + byte pub[2 * MAX_ECC_BYTES]; /* qx || qy, contiguous */ + byte wrapped[96]; +#endif + byte iv[16]; + byte tag[16]; + word32 dLen; + word32 wrappedSz = 0; + int modSz; + int ret; + int tmpInit = 0; + + (void)keysize; + if (rng == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + /* wc_ecc_set_curve() ran before the callback, so key->dp is resolved even + * when curve_id came in as the default. */ + if (curve_id <= 0 && key->dp != NULL) { + curve_id = key->dp->id; + } + modSz = wc_ecc_get_curve_size_from_id(curve_id); + if (modSz <= 0 || (word32)modSz > MAX_ECC_BYTES) { + return CRYPTOCB_UNAVAILABLE; /* unsupported -> software keygen */ + } + +#ifdef WOLFSSL_SMALL_STACK + tmp = (ecc_key*)XMALLOC(sizeof(ecc_key), key->heap, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + return MEMORY_E; + } + scratch = (byte*)XMALLOC((3 * MAX_ECC_BYTES) + 96, key->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (scratch == NULL) { + XFREE(tmp, key->heap, DYNAMIC_TYPE_ECC); + return MEMORY_E; + } + d = scratch; + pub = scratch + MAX_ECC_BYTES; + wrapped = scratch + (3 * MAX_ECC_BYTES); +#endif + + ret = wc_ecc_init_ex(tmp, key->heap, INVALID_DEVID); + if (ret == 0) { + tmpInit = 1; + ret = wc_ecc_make_key_ex(rng, modSz, tmp, curve_id); + } + if (ret == 0) { + dLen = (word32)modSz; + ret = wc_ecc_export_private_only(tmp, d, &dLen); + } + if (ret == 0) { + ret = wc_Stm32_Ccb_EccMakeBlob(curve_id, d, dLen, iv, tag, wrapped, + &wrappedSz, pub, pub + modSz); + /* Only an unsupported-curve / bad-arg result falls back to a software + * key (CRYPTOCB_UNAVAILABLE -> wc_ecc_make_key SW path). A real + * provisioning hardware fault (e.g. WC_TIMEOUT_E / WC_HW_E) is surfaced + * so a key the caller bound to the device is never silently downgraded + * to a plaintext software key. */ + if (ret == WC_NO_ERR_TRACE(NOT_COMPILED_IN) || + ret == WC_NO_ERR_TRACE(BAD_FUNC_ARG)) { + ret = CRYPTOCB_UNAVAILABLE; + } + } + if (ret == 0) { + ret = wc_ecc_import_wrapped_private_ex(key, curve_id, wrapped, wrappedSz, + iv, (word32)sizeof(iv), tag, + (word32)sizeof(tag), pub, + (word32)(2 * modSz)); + } + + ForceZero(d, MAX_ECC_BYTES); + if (tmpInit) { + wc_ecc_free(tmp); + } +#ifdef WOLFSSL_SMALL_STACK + XFREE(scratch, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + XFREE(tmp, key->heap, DYNAMIC_TYPE_ECC); +#endif + return ret; +} +#endif /* WOLFSSL_DHUK && WOLFSSL_STM32_CCB */ + #endif /* !HAVE_ECC_SIGN */ #ifdef WOLFSSL_CUSTOM_CURVES @@ -8161,6 +8388,15 @@ int wc_ecc_free(ecc_key* key) wc_MAXQ10XX_EccFree(key); #endif +#ifdef WOLFSSL_DHUK + /* Scrub the DHUK derivation seed and wrapped scalar (both secret). */ + ForceZero(key->dhuk_seed, sizeof(key->dhuk_seed)); + ForceZero(key->dhuk_wrapped_priv, sizeof(key->dhuk_wrapped_priv)); + key->dhuk_seed_sz = 0; + key->dhuk_wrapped_priv_len = 0; + key->dhuk_plain_priv_len = 0; +#endif + mp_clear(key->pubkey.x); mp_clear(key->pubkey.y); mp_clear(key->pubkey.z); @@ -8892,7 +9128,7 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, #ifndef WOLF_CRYPTO_CB_ONLY_ECC -#if !defined(WOLFSSL_STM32_PKA) && \ +#if (!defined(WOLFSSL_STM32_PKA) || defined(WC_STM32_PKA_SIGN_ONLY)) && \ !defined(WOLFSSL_PSOC6_CRYPTO) && \ !defined(WOLF_CRYPTO_CB_ONLY_ECC) static int wc_ecc_check_r_s_range(ecc_key* key, mp_int* r, mp_int* s) @@ -9410,9 +9646,11 @@ static int ecc_verify_hash(mp_int *r, mp_int *s, const byte* hash, int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) { -#if defined(WOLFSSL_STM32_PKA) +#if defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_SIGN_ONLY) /* HW ECDSA verify via STM32 PKA. Works under both the CubeMX-HAL - * and the bare-metal direct-register paths. Mirror the non-FIPS + * and the bare-metal direct-register paths. (Under WC_STM32_PKA_SIGN_ONLY + * -- e.g. STM32C5 -- verify falls through to the software body below.) + * Mirror the non-FIPS * input-validation from the SW body below (length range, all-zero * digest rejection) so HW + SW share the same input contract. */ #ifndef WC_ALLOW_ECC_ZERO_HASH diff --git a/wolfcrypt/src/port/st/README.md b/wolfcrypt/src/port/st/README.md index 74a93ced00e..d83c1001a37 100644 --- a/wolfcrypt/src/port/st/README.md +++ b/wolfcrypt/src/port/st/README.md @@ -22,7 +22,7 @@ Support for STM32 on-chip crypto hardware acceleration across the following fami | `WOLFSSL_STM32WBA` | WBA52 (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | | `WOLFSSL_STM32WL` | WL55 (TinyAES / RNG / V1 PKA) | | `WOLFSSL_STM32C0` | C0xx (not yet supported in settings.h; SW only) | -| `WOLFSSL_STM32C5` | C5xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK) | +| `WOLFSSL_STM32C5` | C5xx (TinyAES / HASH / RNG / SAES / V2 PKA sign-only / DHUK / CCB) | | `WOLFSSL_STM32N6` | N6xx (TinyAES / HASH / RNG / SAES / V2 PKA / DHUK; M55 core) | | `WOLFSSL_STM32MP13` | MP13 (CRYP / HASH / RNG / PKA; Cortex-A7) | | `WOLFSSL_STM32MP25` | MP25 (not yet supported in settings.h; Cortex-A35 + M33) | @@ -31,12 +31,27 @@ The port supports three integration flavors: - **CubeMX HAL** (`WOLFSSL_STM32_CUBEMX`) -- recommended for most projects. Pairs with ST's CubeMX-generated HAL drivers. This is the legacy default and what STM32 forum tutorials describe. - **Standard Peripheral Library** (no `WOLFSSL_STM32_CUBEMX`) -- legacy StdPeriLib path, kept for older F1/F2/F4 projects that have not migrated. -- **BARE-metal** (`WOLFSSL_STM32_BARE`) -- direct-register access with zero HAL or StdPeriLib dependency. Designed for wolfBoot / no-OS / FreeRTOS / TrustZone-NS workloads where pulling in the HAL is undesirable. See [wolfssl-examples-stm32/STM32_Bare_Test](https://github.com/wolfSSL/wolfssl-examples-stm32) for a 27-board reference matrix. +- **BARE-metal** (`WOLFSSL_STM32_BARE`) -- direct-register access with zero HAL or StdPeriLib dependency. Designed for wolfBoot / no-OS / FreeRTOS / TrustZone-NS workloads where pulling in the HAL is undesirable. See [wolfssl-examples-stm32/STM32_Bare_Test](https://github.com/wolfSSL/wolfssl-examples-stm32) for the NUCLEO board reference matrix. Support for the STSAFE-A secure element family via I2C is documented separately below. For details see our [wolfSSL ST](https://www.wolfssl.com/docs/stm32/) page. +### Examples and on-target tests + +The companion [wolfssl-examples-stm32](https://github.com/wolfSSL/wolfssl-examples-stm32) repo (added in PR [#13](https://github.com/wolfSSL/wolfssl-examples-stm32/pull/13)) provides the `STM32_Bare_Test` harness: a Makefile build over the NUCLEO board matrix with per-feature targets. Build/flash with `make BOARD= CONFIG=bare TARGET=` (and `flash`). Each target maps to one worked example (sources are under `STM32_Bare_Test/src/` in that repo / PR #13): + +| `TARGET=` | Source | Exercises | +|-----------|--------|-----------| +| `test` | `STM32_Bare_Test/src/main_test.c` | wolfCrypt KAT + full `wolfcrypt_test` suite | +| `bench` | `STM32_Bare_Test/src/main_bench.c` | wolfCrypt benchmark | +| `dhuk` | `STM32_Bare_Test/src/main_dhuk.c` | transparent DHUK GMAC / AES-ECB / ECDSA + `wc_ecc_import_wrapped_private` validation | +| `ccb` | `STM32_Bare_Test/src/main_ccb.c` | transparent CCB ECDSA (make_key/sign/verify) + `wc_ecc_import_wrapped_private_ex` validation | +| `ccbhal` | `STM32_Bare_Test/src/main_ccbhal.c` | CubeMX `HAL_CCB_*` CCB reference flow | +| `stsaes` | `STM32_Bare_Test/src/main_stsaes.c` | ST HAL SAES wrapped-key (DHUK) flow | +| `c5sign` | `STM32_Bare_Test/src/main_c5sign.c` | STM32C5 bare PKA protected-ECDSA-sign probe (vs ST CAVP vector) | +| `c5rng` | `STM32_Bare_Test/src/main_c5rng.c` | STM32C5 HW-RNG conditioning probe | + ## STM32 Symmetric Acceleration @@ -62,6 +77,8 @@ You can selectively disable parts of the HW acceleration: If your chip simply does not have an IP block (e.g. H7Ax has no CRYP/HASH; F207 has no CRYP/HASH) the family arm sets the appropriate `NO_STM32_*` defines for you. +The TinyAES IP exposes a single key-size bit (128/256 only), so wolfSSL auto-defines `NO_AES_192` on those families (C5/H5/G4/G0/U0/L4/L5/U3/U5/WB/WBA/WL). AES-192 is therefore unavailable under HW crypto on these parts (there is no software fallback when `NO_STM32_CRYPTO` is not set); the CRYP-IP families are unaffected. + ### SAES instance routing Some newer families (H5/H7S/U3/U5/WBA/C5/N6, plus the L562 sub-variant) expose a Secure AES (SAES) instance in addition to (or instead of) a regular AES block. Define `WOLFSSL_STM32_USE_SAES` to route all wolfcrypt AES traffic through SAES via the `WC_STM32_AES_INST` indirection macro. This is required when the regular AES block is TrustZone-gated (H7S3) and is also a prerequisite for DHUK key-wrap on the families in the `WC_STM32_HAS_DHUK` gate (U3/U5/H5/WBA/C5). @@ -72,7 +89,12 @@ Include `` before any other wolfSSL headers. If bu ### Benchmarks -See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) page for canonical numbers. For per-silicon BARE-vs-CubeMX comparisons across the current 27-board NUCLEO matrix, see the bench tables in [wolfssl-examples-stm32/STM32_Bare_Test/README.md](https://github.com/wolfSSL/wolfssl-examples-stm32). +See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) page for canonical numbers. For per-silicon BARE-vs-CubeMX comparisons across the NUCLEO board matrix, see the bench tables in the `STM32_Bare_Test/README.md` of the examples repo (PR #13). + + +## STM32 RNG + +The direct-register RNG path (`WOLFSSL_STM32F427_RNG` / `WOLFSSL_STM32_RNG_NOLIB` / `STM32_NUTTX_RNG`, and the new BARE/C5 port) shares one `wc_GenerateSeed` implementation. As of this port it does a bounded per-word poll plus a seed/clock-error (`SEIS`/`CEIS`) recover-and-retry loop -- clear the status, toggle `RNGEN`, discard up to four stale `DR` words, and retry up to a fixed budget -- instead of the previous immediate `SECS`/`CECS` hard-fail. It still fails closed (`RNG_FAILURE_E`) once the retry budget is exhausted and only emits a word when `DRDY` is set with no error latched. Net effect for existing F4/F7/F427/NuttX users: a transient seed/clock error is now recovered rather than reported on the first occurrence. The behavior is shared across all direct-register families; if you require the legacy immediate fast-fail, gate accordingly in `user_settings.h`. ## STM32 PKA (Public Key Acceleration) @@ -93,7 +115,118 @@ The STM32 PKA peripheral accelerates ECC scalar multiplication and ECDSA sign/ve - On V1 PKA chips the PKA peripheral runs a PKA-RAM clear on first clock-enable and silently rejects `CR.EN` writes during the clear. The wolfcrypt init mirrors HAL's behavior by spinning on `CR.EN` readback up to `WC_STM32_PKA_INIT_TIMEOUT` iterations. This was discovered during L5 bring-up but benefits every PKA chip. - On V2 PKA the `coefB` parameter must be loaded explicitly (V1 hardware can derive it from the prime). The V2 ECC scalar-multiplication path in `HAL_PKA_ECCMul()` and the ECDSA sign/verify paths both populate it -- see `wolfcrypt/src/port/st/stm32.c`. -- BARE-metal V2 PKA ECDSA sign/verify is work-in-progress -- the single-curve P-256 path is functional but multi-curve sweeps in `wolfcrypt_test` hit a -248 result on some boards. Track this in the wolfssl-examples-stm32 STM32_Bare_Test/README. +- Some parts expose only one ECDSA direction in hardware, and wolfSSL auto-gates accordingly (see `wolfssl/wolfcrypt/port/st/stm32.h`): STM32H563 has a "light" PKA that does ECDSA **verify** only (`WC_STM32_PKA_VERIFY_ONLY` is auto-defined, so `wc_ecc_sign_hash` routes to software while `wc_ecc_verify_hash` stays on the HW PKA), and STM32C5 currently does protected ECDSA **sign** only (`WC_STM32_PKA_SIGN_ONLY`; HW verify has an unresolved wolfSSL-context failure, so `wc_ecc_verify_hash` routes to software). STM32H573 keeps full sign + verify. Define either macro yourself to force the split on any other part. +- For PKA / SAES bring-up debugging, define `WC_STM32_PKA_DIAG` (printf on PKA init / op timeout / `OUT_ERROR`) or `WC_STM32_SAES_DIAG` (printf on AES/SAES `CCF` poll timeout). Both are dead code by default and add nothing to a normal build; they are the diagnostics that root-caused the V2 PKA driver bugs above. +- BARE-metal V2 PKA ECDSA sign and verify are validated on STM32U585, WBA52, H7S3, N657, and U385: full `wolfcrypt_test` ECC PASS including the multi-curve sweep. A prior `-248` on the multi-curve sweep was a stack of five BARE V2 driver bugs (PKA `SR_INITOK` wait, V2 `B_COEFF` load, V2 success code, `MOD_NB_BITS` slot reuse, and `coefSign` default) and is resolved. On STM32C5 the standalone HW PKA is held off in the default board configuration pending an open RNG NIST-init silicon issue (V2 PKA `INITOK` gates on `SR.RNGOKF`), so C5 ECDSA verify falls back to software; the C5 CCB-protected sign path (which drives the PKA through the CCB) is validated -- see the CCB section. + + +## STM32 DHUK (Device Hardware Unique Key) + +Newer STM32 silicon (U3/U5/WBA/H5/C5/N6; the `WC_STM32_HAS_DHUK` family gate) carries a chip-unique 256-bit key (DHUK) burned into the SAES key-derivation path. wolfSSL exposes it through the standard crypto-callback (`WOLF_CRYPTO_CB`) framework: register the STM32 DHUK device once, init a normal `Aes` / `ecc_key` with its `devId`, then perform NORMAL wolfCrypt operations (AES, AES-GCM/GMAC, ECDSA sign) transparently. There is no separate DHUK module -- the STM32 crypto callback lives in `wolfcrypt/src/port/st/stm32.c`. + +A DHUK-protected key is driven by a per-key 256-bit seed. The SAES derives the device-bound working key from (seed, DHUK) inside the hardware; for symmetric operations the derived key never appears in software. For ECDSA the derived key decrypts a wrapped private scalar into a short-lived buffer only. + +### Enabling + +``` +#define WOLFSSL_DHUK /* enable DHUK */ +#define WOLF_CRYPTO_CB /* required -- DHUK routes through crypto callbacks */ +``` + +`WC_STM32_HAS_DHUK` is auto-defined for the SAES+DHUK families when `WOLFSSL_DHUK` is set; other families compile out the DHUK code. `WOLFSSL_STM32_BARE` selects the bare-metal SAES backend. + +### Migration from WOLFSSL_STM32U5_DHUK + +`WOLFSSL_STM32U5_DHUK` is now an alias for this `WOLFSSL_DHUK` crypto-callback model and requires `WOLF_CRYPTO_CB` (a `#error` fires otherwise). The previous experimental inline path -- wrapped-key AES handled directly inside `wc_AesEncrypt` / `wc_AesDecrypt` / `wc_AesCbcEncrypt` / `wc_AesCbcDecrypt`, plus `wc_Stm32_Aes_SetDHUK_IV()`, `wc_Stm32_Aes_UnWrap()`, and the `Aes.dhukIV` / `dhukIVLen` members -- has been removed (fail-loud: code referencing those symbols no longer compiles). Migrate to the devId model shown below: register the device, init with `WC_DHUK_DEVID`, and use the normal `wc_Aes*` / `wc_ecc_*` APIs. Note that transparent DHUK AES/GMAC is bare-only (`WOLFSSL_STM32_BARE`); on the CubeMX/HAL path the crypto callback covers CCB ECDSA sign/keygen only. + +### API + +```c +/* one-time: register the STM32 DHUK crypto-callback device */ +wc_Stm32_DhukRegister(WC_DHUK_DEVID); + +/* AES / GMAC: enable via devId at init, then pass the 256-bit seed as the key */ +Aes aes; +wc_AesInit(&aes, NULL, WC_DHUK_DEVID); +wc_AesGcmSetKey(&aes, seed, 32); +wc_AesGcmEncrypt(&aes, NULL, NULL, 0, iv, ivSz, tag, tagSz, aad, aadSz); /* GMAC */ +wc_AesFree(&aes); + +wc_Stm32_DhukUnRegister(WC_DHUK_DEVID); +``` + +ECDSA mirrors this: init the key with `wc_ecc_init_ex(&key, NULL, WC_DHUK_DEVID)`, import the wrapped private scalar plus its derivation seed with `wc_ecc_import_wrapped_private(&key, seed, seedSz, wrapped, wrappedLen, plainLen)`, then call the normal `wc_ecc_sign_hash()`; verification uses the in-clear public key unchanged. The seed reaches the device as the AES key bytes (`aes->devKey`, set by the normal `wc_AesSetKey` / `wc_AesGcmSetKey`) or, for ECC, on the `ecc_key`; the STM32 callback reads it and derives the working key inside SAES. + +Worked example: `STM32_Bare_Test/src/main_dhuk.c` (examples repo, PR #13) drives `wc_Stm32_DhukRegister` through transparent GMAC, AES-ECB, and ECDSA, and exercises the `wc_ecc_import_wrapped_private` argument validation in its `test_ecc_dhuk_setter()` block. + +### Provisioning helper + +`wc_Stm32_Aes_Wrap()` performs a chip-bound DHUK wrap (KEYSEL=HW, deterministic output) and is retained for provisioning wrapped key material. `WOLFSSL_DHUK_DEVID` (808) / `WOLFSSL_SAES_DEVID` (807) select its wrap-key source. + +### Current state + +- Validated on STM32U385 (TZEN=0): transparent GMAC, AES-ECB, and ECDSA sign all run through the crypto-callback path; the derived key is deterministic, AES round-trips, and ECDSA verifies with the public counterpart. +- The SAES key-derivation/unwrap passes complete via `SR.BUSY` clearing plus `SR.KEYVALID`, NOT via `CCF` (which is only raised for data-output passes). Waiting on `CCF` for the key path was the original `WC_TIMEOUT_E`; the BUSY/KEYVALID completion is the fix. +- DHUK is validated in non-secure state (TZEN=0). Operation under TZEN=1 secure state (which adds SAES RNG / GTZC interactions) is not yet characterized; DHUK does not otherwise require secure state. + +### Optional exact-key import (off by default) + +`wc_Stm32_Aes_DhukOp[_ex]()` unwraps a previously DHUK-wrapped key into SAES KEYR and runs AES ECB/CBC with it (importing an externally-chosen key, vs deriving one from a seed). It is an optional/experimental path: compiled only with `WOLFSSL_STM32_DHUK_UNWRAP` and called explicitly (not auto-routed through the crypto callback). + + +## STM32 CCB (Coupling and Chaining Bridge) + +STM32U3 and STM32C5 silicon (e.g. U385 / C5A3; RM0487 ch 31 and RM0522, the `WC_STM32_HAS_CCB` gate) carry the CCB peripheral, which chains the PKA, SAES and RNG over a private local interconnect. This lets a DHUK-protected ECDSA private scalar be unwrapped by the SAES and consumed by the PKA entirely in hardware -- the scalar never crosses the system bus or enters software, not even into a short-lived buffer (unlike the generic DHUK ECDSA path above, which decrypts the scalar into a stack buffer). CCB builds on DHUK: the private key is held as a chip-bound AES-GCM blob (`iv` / `tag` / wrapped scalar) created under the silicon DHUK. + +CCB is supported on both build paths: the bare-metal direct-register OPSTEP driver (`WOLFSSL_STM32_BARE`) and the CubeMX/HAL path (`WOLFSSL_STM32_CUBEMX`, via ST's `HAL_CCB_*` driver). It currently covers ECDSA over P-256. + +### Enabling + +``` +#define WOLFSSL_DHUK /* CCB is a DHUK feature */ +#define WOLF_CRYPTO_CB /* required -- transparent sign routes through crypto callbacks */ +#define WOLFSSL_STM32_CCB /* opt in to the CCB-protected ECDSA path */ +``` + +`WOLFSSL_STM32_CCB` requires CCB silicon (`WOLFSSL_STM32U3` or `WOLFSSL_STM32C5`) and either `WOLFSSL_STM32_BARE` or `WOLFSSL_STM32_CUBEMX` (a `#error` fires otherwise). + +### API + +The whole flow uses the **standard ECC API** -- there is no CCB-specific public API. Binding the key to `WC_DHUK_DEVID` routes keygen and sign through the STM32 crypto callback, which provisions and uses the CCB-protected key transparently (a drop-in for TLS and other consumers). The same flow works on both build paths. + +```c +ecc_key key; + +/* one-time: register the STM32 DHUK/CCB crypto-callback device */ +wc_Stm32_DhukRegister(WC_DHUK_DEVID); + +wc_ecc_init_ex(&key, NULL, WC_DHUK_DEVID); + +/* provision a fresh device-bound key with the STANDARD keygen -- the crypto + * callback intercepts it: the CCB generates the scalar, wraps it into a blob + * and derives the public key, all in hardware. No CCB-specific API. */ +wc_ecc_make_key_ex(&rng, 32, &key, ECC_SECP256R1); + +/* transparent sign -- the scalar is unwrapped SAES->PKA in HW and signed */ +wc_ecc_sign_hash(hash, hashLen, sig, &sigLen, &rng, &key); + +/* verify with the in-clear public key, unchanged */ +wc_ecc_verify_hash(sig, sigLen, hash, hashLen, &verified, &key); + +wc_ecc_free(&key); +wc_Stm32_DhukUnRegister(WC_DHUK_DEVID); +``` + +To reuse a key across resets, persist the blob from a provisioned key and reload it later with `wc_ecc_import_wrapped_private_ex(&key, curve_id, wrapped, wrappedLen, iv, tag, pub, pubLen)` (the public key in uncompressed `qx||qy` form), then sign as above. Both paths set `key->dhuk_is_ccb` and the device `devId`, so dispatch to the CCB happens automatically inside the crypto callback. + +Worked example: `STM32_Bare_Test/src/main_ccb.c` (examples repo, PR #13) runs the full `wc_ecc_make_key` -> `wc_ecc_sign_hash` -> `wc_ecc_verify_hash` flow and exercises the `wc_ecc_import_wrapped_private_ex` argument validation in its `ccb_import_arg_checks()` block. + +### Current state + +- Validated on STM32U385 (NUCLEO-U385RG-Q, TZEN=0), P-256, on both the bare-metal and CubeMX/HAL build paths: `wc_ecc_make_key` -> `wc_ecc_sign_hash` -> `wc_ecc_verify_hash` round-trips, with the private scalar never present in software. +- Also validated on STM32C5 (NUCLEO-C5A3ZG, TZEN=0), P-256, bare-metal: the same `wc_ecc_make_key` -> `wc_ecc_sign_hash` -> `wc_ecc_verify_hash` flow plus a persisted-blob re-import (`wc_ecc_import_wrapped_private_ex`) round-trip, all on the CCB hardware. On STM32C5 the blob-create step is a combined create-and-sign: the C5 OPSTEP machine only advances through the GCM-final phase when the random k is drawn and the PKA sign is started during creation (the r,s are a by-product and discarded). That extra sequence is gated by `WOLFSSL_STM32C5` in the bare driver; the U3 OPSTEP machine does not require it. +- `Stm32Ccb_Init()` pulse-resets the PKA / SAES / RNG before each operation, so the first CCB op is robust even when prior standalone crypto (RNG seeding, ECC keygen) left an engine in a state that would otherwise stall the CCB's chained SAES GCM step. The family-specific reset register name is abstracted (`WC_STM32_CCB_RSTR`). +- CCB requires the U3 / C5 at its full clock; the reference clock-tree bring-up is in the bare example's `boards/u3/hw_init.c` (96 MHz) and `boards/c5a3/hw_init.c`. ## STM32 BARE-metal port @@ -124,7 +257,7 @@ In return wolfcrypt drives the IP-block registers directly. Family-specific arms ### Per-family HW IP coverage (BARE-metal validation matrix) -The following table summarizes which IP blocks the BARE path drives on each family currently in the validation matrix. `-` means the silicon does not carry the IP; the corresponding wolfcrypt algorithm falls back to software. +The following table summarizes which IP blocks the BARE path drives on each family currently in the validation matrix. `-` means the silicon does not carry the IP; the corresponding wolfcrypt algorithm falls back to software. (This is the per-chip BARE-silicon validation view; the family-flag table at the top of this document is the broader "what each family supports across all three flavors" view -- the two overlap by design.) | Family | Chip example | AES | HASH | RNG | PKA | SAES | DHUK | |----------|------------------|------------|------|------|-----|------|------| diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 5a7dffc9e15..e22d60624de 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -31,6 +31,12 @@ #include #include #include +#ifdef WOLFSSL_DHUK + #include + #ifdef HAVE_ECC + #include + #endif +#endif #ifdef NO_INLINE #include @@ -269,10 +275,14 @@ extern PKA_HandleTypeDef hpka; #define WC_STM32_PKA_OK_CODE 0UL #endif -/* Number of word slots in the PKA RAM array (per the CMSIS device - * header; e.g. 894 on WB55 V1). */ +/* Number of 32-bit word slots in the PKA RAM (e.g. 894 on WB55 V1). + * Computed from the RAM byte size / 4 rather than the element count: + * most CMSIS headers type RAM as uint32_t[], but the STM32C5 header + * types it as uint8_t[5336], so element-count would yield bytes. The + * PKA RAM is word-addressed on every part, so byte-size/4 is correct + * for both. */ #define WC_STM32_PKA_RAM_WORDS \ - (sizeof(((PKA_TypeDef*)0)->RAM) / sizeof(((PKA_TypeDef*)0)->RAM[0])) + (sizeof(((PKA_TypeDef*)0)->RAM) / 4U) /* Big-endian byte buffer -> PKA RAM (little-endian word order). The * destination is the PKA RAM slot indexed by 'word_idx'; n is the byte @@ -343,7 +353,10 @@ static volatile uint32_t* wc_stm32_pka_prep_ram(PKA_HandleTypeDef* hpkah) #endif return NULL; } - return hpkah->Instance->RAM; + /* Cast to word pointer: the STM32C5 CMSIS types PKA RAM as uint8_t[], + * others as uint32_t[]. Callers word-index the returned pointer, which + * the PKA RAM requires (byte accesses bus-fault). */ + return (volatile uint32_t*)(void*)hpkah->Instance->RAM; } /* PKA RAM (little-endian word order) -> big-endian byte buffer. */ @@ -489,10 +502,14 @@ static HAL_StatusTypeDef wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah) static void HAL_PKA_RAMReset(PKA_HandleTypeDef *hpkah) { + volatile uint32_t* ram; uint32_t i; if (hpkah == NULL || hpkah->Instance == NULL) return; + /* Word-addressed: index a uint32_t view, not the CMSIS RAM[] element + * type (uint8_t[] on STM32C5 -> byte stores, which bus-fault). */ + ram = (volatile uint32_t*)(void*)hpkah->Instance->RAM; for (i = 0; i < WC_STM32_PKA_RAM_WORDS; i++) { - hpkah->Instance->RAM[i] = 0UL; + ram[i] = 0UL; } } @@ -593,6 +610,49 @@ static HAL_StatusTypeDef wc_stm32_pka_process(PKA_HandleTypeDef *hpkah, return HAL_OK; } +#ifdef WOLFSSL_STM32C5 +/* The STM32C5 PKA implements only the side-channel-PROTECTED ECDSA SIGN + * (mode 0x24); the plain ECDSA SIGN does not exist. The protected engine + * requires the operating MODE written to PKA_CR BEFORE the operands are loaded + * into PKA RAM, on a freshly-erased RAM. The standard V2 flow writes the mode + * AFTER the operands (correct on U3/U5/N6) and on the C5 yields an operation + * that completes cleanly (PROCENDF, OUT_ERROR=OK) but returns a WRONG r,s. + * Arm the mode up front here, on a fresh RAM erase (disable then re-enable -> + * INITOK). The HW RNG is intentionally left running -- the protected sign + * chains with it for side-channel blinding (RM0522 Table 241) and signs + * correctly with it enabled. This is for the SIGN only: ECDSA VERIFY (0x26) is + * a plain, non-protected public operation and runs the standard V2 path. */ +static HAL_StatusTypeDef wc_stm32_pka_arm_mode(PKA_HandleTypeDef *hpkah, + uint32_t mode) +{ + PKA_TypeDef *p; + uint32_t cr, t; + + if (hpkah == NULL || hpkah->Instance == NULL) { + return HAL_ERROR; + } + p = hpkah->Instance; + + /* Fresh PKA RAM erase: disable then re-enable, wait for INITOK. */ + p->CR = 0U; + __DMB(); + p->CR = PKA_CR_EN; + t = 0; + while ((p->SR & PKA_SR_INITOK) == 0U) { + if (++t >= WC_STM32_PKA_TIMEOUT_LOOPS) { + return HAL_TIMEOUT; + } + } + /* Write the operating mode before the operands are loaded. */ + cr = p->CR; + cr &= ~PKA_CR_MODE; + cr |= (mode << PKA_CR_MODE_Pos) & PKA_CR_MODE; + p->CR = cr; + __DMB(); + return HAL_OK; +} +#endif /* WOLFSSL_STM32C5 */ + static HAL_StatusTypeDef HAL_PKA_ECCMul(PKA_HandleTypeDef *hpkah, PKA_ECCMulInTypeDef *in, uint32_t Timeout) { @@ -739,6 +799,14 @@ static HAL_StatusTypeDef HAL_PKA_ECDSASign(PKA_HandleTypeDef *hpkah, RAM = wc_stm32_pka_prep_ram(hpkah); if (RAM == NULL) return HAL_ERROR; +#ifdef WOLFSSL_STM32C5 + /* C5 protected sign (0x24): arm the mode on a fresh RAM erase before the + * operands are written (see wc_stm32_pka_arm_mode). */ + if (wc_stm32_pka_arm_mode(hpkah, PKA_MODE_ECDSA_SIGNATURE) != HAL_OK) { + return HAL_ERROR; + } +#endif + /* Capture sizes on the handle BEFORE the operation -- V2 PKA * clobbers RAM[MOD_NB_BITS] during compute. GetResult reads from * the handle on V2 (matches HAL behaviour). */ @@ -843,8 +911,6 @@ static void HAL_PKA_ECDSASign_GetResult(PKA_HandleTypeDef *hpkah, #ifdef STM32_HASH -/* #define DEBUG_STM32_HASH */ - #if defined(WOLFSSL_STM32_BARE) && !defined(WC_STM32_HASH_CLK_ENABLE) #error "WOLFSSL_STM32_BARE: HASH clock-enable not mapped for this STM32 \ family. Add WC_STM32_HASH_CLK_ENABLE() to \ @@ -2341,144 +2407,2257 @@ static void Stm32SaesEnsureRng(void) RNG->CR |= RNG_CR_RNGEN; __DMB(); } +#ifdef RCC_CR_SHSION + /* On STM32U5/U3 the SAES kernel clock is the SHSI (secure HSI). It must + * be running or the SAES IP never computes -- CCF never asserts and DHUK + * wrap/unwrap time out (the SAESSEL mux defaults to SHSI, so just enable + * SHSI and wait for ready). ST configures this in HAL_CRYP_MspInit; the + * bare-metal path has to do it here. */ + if ((RCC->CR & RCC_CR_SHSION) == 0U) { + int t = 0; + RCC->CR |= RCC_CR_SHSION; + while ((RCC->CR & RCC_CR_SHSIRDY) == 0U) { + if (++t >= STM32_BARE_SAES_TIMEOUT) { + break; + } + } + __DMB(); + } +#endif } #endif /* WOLFSSL_DHUK || WOLFSSL_STM32_USE_SAES */ +#if defined(WOLFSSL_DHUK) +/* BARE DHUK / SAES key wrap+unwrap. Mirrors STM32Cube U5 + * stm32u5xx_hal_cryp_ex.c. + * wc_Stm32_Aes_Wrap -- wrap a plain key for provisioning. + * wc_Stm32_Aes_DhukOp -- combined unwrap + ECB enc/dec using the + * wrapped key in aes->key (KMOD=WRAPPED, + * KEYSEL=HW). ECB only for now. */ + +/* The DHUK code calls Stm32AesPollCCF(SAES, STM32_BARE_SAES_TIMEOUT) and + * STM32_AES_CLEAR_INST(SAES) directly -- unified with the regular AES + * path; see the Stm32AesPollCCF / STM32_AES_CLEAR_INST definitions + * above. */ +#define Stm32SaesWaitCCF() Stm32AesPollCCF(SAES, STM32_BARE_SAES_TIMEOUT) +#define Stm32SaesClearCCF() STM32_AES_CLEAR_INST(SAES) + +/* Run one ECB block through SAES: push the 4-word input (DINR x4), wait + * for CCF, read the 4-word result (DOUTR x4) back into buf in place, then + * clear CCF. Returns the Stm32SaesWaitCCF() status; on timeout the DOUTR + * words are left unread and CCF is not cleared (the caller ForceZero's buf + * and bails). Centralizes the DINR / CCF / DOUTR idiom shared by the DHUK + * wrap, GMAC and ECB/CBC paths. */ +static int Stm32SaesEcbBlock(word32 buf[4]) +{ + int ret; + SAES->DINR = buf[0]; + SAES->DINR = buf[1]; + SAES->DINR = buf[2]; + SAES->DINR = buf[3]; + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + return ret; + } + buf[0] = SAES->DOUTR; + buf[1] = SAES->DOUTR; + buf[2] = SAES->DOUTR; + buf[3] = SAES->DOUTR; + Stm32SaesClearCCF(); + return ret; +} +/* Wrap an AES key via SAES. Wrap-key source selected by aes->devId: + * WOLFSSL_DHUK_DEVID -- KEYSEL=HW: encrypt under silicon DHUK + * (chip-bound blob); aes->key is ignored. + * anything else -- KEYSEL=NORMAL: encrypt under aes->key + * (loaded into KEYR). */ +int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, + byte* out, word32* outSz, const byte* iv, int ivSz) +{ + int ret; + int useDhuk; + word32 cr; + word32 i; + word32 nWords; + word32 keyLen; + word32 buf[8]; /* up to 256-bit key */ -#elif defined(WOLFSSL_STM32_CUBEMX) + if (aes == NULL || in == NULL || out == NULL || outSz == NULL) { + return BAD_FUNC_ARG; + } + if (inSz != 16 && inSz != 32) { + return BAD_FUNC_ARG; + } + if (iv != NULL && ivSz != 16) { + return BAD_FUNC_ARG; + } -#if defined(WOLFSSL_DHUK) -/* Wrap an AES key using the DHUK */ -int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, - word32* outSz, const byte* iv, int ivSz) -{ - CRYP_HandleTypeDef hcryp; - int ret = 0; - byte key[AES_256_KEY_SIZE]; + useDhuk = (aes->devId == WOLFSSL_DHUK_DEVID); - /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from random.c - turn off the RNG clock -- re-enable the clock here */ - __HAL_RCC_RNG_CLK_ENABLE(); - ByteReverseWords((word32*)key, (word32*)in, inSz); - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - if (ret == 0) { - hcryp.Instance = SAES; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - hcryp.Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; - hcryp.Init.KeySelect = CRYP_KEYSEL_HW; /* use DHUK to unwrap with use */ - hcryp.Init.KeyMode = CRYP_KEYMODE_WRAPPED; - if (iv != NULL) { - hcryp.Init.pInitVect = (uint32_t *)iv; - hcryp.Init.Algorithm = CRYP_AES_CBC; + /* KEYSIZE and the KEYR load describe the WRAPPING key, not the wrapped + * payload (inSz). Under KEYSEL = HW the wrapping key is the 256-bit DHUK; + * otherwise it is aes->key (aes->keylen bytes). */ + if (useDhuk) { + keyLen = 32; /* DHUK is 256-bit */ + } + else { + keyLen = aes->keylen; + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; } - else { - hcryp.Init.Algorithm = CRYP_AES_ECB; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* RNG must be running before SAES clock-enable -- SAES self-init + * pulls entropy from the RNG. */ + Stm32SaesEnsureRng(); +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); +#endif + + /* Wait for SAES self-init (SR.BUSY) to clear before configuring. */ + ret = Stm32SaesWaitInit(); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } + + /* Disable SAES before reconfiguring CR (per RM). Clear any stale + * CCF before we begin. */ + SAES->CR = 0; + Stm32SaesClearCCF(); + + /* CR: byte data type, KMOD = WRAPPED, MODE = ENCRYPT (= 0), + * CHMOD = ECB (default) or CBC (when IV given), KEYSIZE = 256 + * if 32-byte key. KEYSEL = HW (DHUK) under useDhuk, else NORMAL. */ + cr = AES_CR_DATATYPE_1; /* 0b10 -- byte */ + cr |= AES_CR_KMOD_0; /* KMOD = WRAPPED */ + if (useDhuk) { + cr |= AES_CR_KEYSEL_0; /* KEYSEL = HW (DHUK) */ + } + if (keyLen == 32) { + cr |= AES_CR_KEYSIZE; + } + if (iv != NULL) { + cr |= AES_CR_CHMOD_0; /* CHMOD = CBC */ + } + SAES->CR = cr; + + /* Load KEYR only for the software-key path. With KEYSEL = HW the + * IP reads DHUK directly and ignores KEYR. */ + if (!useDhuk) { + ret = Stm32AesLoadKeyInst(SAES, (const word32*)aes->key, keyLen); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; } - ret = HAL_CRYP_Init(&hcryp); } - if (ret == HAL_OK) { - ret = HAL_CRYPEx_WrapKey(&hcryp, (uint32_t*)key, (uint32_t*)out, 100); - HAL_CRYP_DeInit(&hcryp); + if (iv != NULL) { + /* Alignment-safe IV copy via local buffer (iv is a byte* and + * may not be 4-byte aligned). IVR{3..0} are written in the + * same word order the existing TinyAES Stm32AesLoadIv() helper + * uses (high-significance word -> IVR3, low -> IVR0), and the + * IV bytes are taken as-is to match the caller's convention. */ + word32 ivWords[4]; + XMEMCPY(ivWords, iv, 16); + SAES->IVR3 = ivWords[0]; + SAES->IVR2 = ivWords[1]; + SAES->IVR1 = ivWords[2]; + SAES->IVR0 = ivWords[3]; } - ForceZero(key, sizeof(key)); + (void)ivSz; - ByteReverseWords((word32*)out, (word32*)out, inSz); + /* Stage input. */ + XMEMCPY(buf, in, inSz); + + /* Enable SAES. */ + SAES->CR |= AES_CR_EN; + + /* Process one block (4 words) at a time. 128-bit key = 1 block, + * 256-bit key = 2 blocks. */ + nWords = inSz / 4u; + for (i = 0; i < nWords; i += 4u) { + ret = Stm32SaesEcbBlock(&buf[i]); + if (ret != 0) { + goto exit; + } + } + + SAES->CR &= ~AES_CR_EN; + + XMEMCPY(out, buf, inSz); *outSz = inSz; - (void)aes; + +exit: + ForceZero(buf, sizeof(buf)); + wolfSSL_CryptHwMutexUnLock(); return ret; } - -#endif - -int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) +/* Combined DHUK unwrap + ECB encrypt or decrypt. The caller's aes + * struct holds the wrapped 256-bit key; SAES unwraps it under the + * silicon-bound DHUK and runs ECB enc/dec on the input blocks. + * + * Default-off: the unwrap-decrypt pass needs secure-state context + * (TZ-enabled build) -- on the silicon we have on hand (U3, WBA52, + * both TZEN=0 from factory) the wrapped-key DECRYPT hangs with + * SR.KEYVALID=1 but CCF never asserts. Wrap is silicon-validated + * deterministic chip-bound output via wc_Stm32_Aes_Wrap; DhukOp + * stays gated until a TZ-secure validation lands. Define + * WOLFSSL_STM32_DHUK_UNWRAP to opt into the experimental path. */ +#ifndef WOLFSSL_STM32_DHUK_UNWRAP +int wc_Stm32_Aes_DhukOp_ex(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc, int isCbc) +{ + (void)aes; (void)out; (void)in; (void)sz; (void)isEnc; (void)isCbc; + return CRYPTOCB_UNAVAILABLE; +} +#else +int wc_Stm32_Aes_DhukOp_ex(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc, int isCbc) { int ret; - word32 keySize; -#ifdef STM32_HW_CLOCK_AUTO - /* enable the peripheral clock */ - __HAL_RCC_CRYP_CLK_ENABLE(); + word32 cr; + word32 cr2; + word32 chmod; + word32 i; + word32 blocks; + word32 wrappedKey[8]; + byte prevCt[WC_AES_BLOCK_SIZE]; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + /* DHUK is 256-bit only. */ + if (aes->keylen != 32) { + return BAD_FUNC_ARG; + } + chmod = isCbc ? STM32_AES_CHMOD_CBC : STM32_AES_CHMOD_ECB; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + Stm32SaesEnsureRng(); +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); #endif - ret = wc_AesGetKeySize(aes, &keySize); - if (ret != 0) + /* Wait for SAES self-init (SR.BUSY) before configuring. */ + ret = Stm32SaesWaitInit(); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); return ret; + } - XMEMSET(hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch (keySize) { - case 16: /* 128-bit key */ - hcryp->Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 24: /* 192-bit key */ - hcryp->Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 32: /* 256-bit key */ - hcryp->Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; + /* Stage the wrapped key (256-bit) for DINR push. aes->key arrives + * byte-reversed (BARE convention). */ + XMEMCPY(wrappedKey, aes->key, 32); + + /* Step 1: Unwrap. Mirrors HAL CRYPEx_KeyDecrypt verbatim, using + * MODIFY_REG-style writes that preserve EN across MODE transitions: + * + * (1a) CR = KMOD=WRAPPED + KEYSEL=HW + KEYSIZE=256 + CHMOD=ECB + * + DATATYPE=byte + MODE=KEYDERIVATION. EN=0 initially. + * (1b) Set EN. Wait CCF. Clear CCF. + * (2a) MODIFY MODE -> DECRYPT, keep EN set. + * (2b) Push 8 wrapped key words via DINR in 2 four-word blocks, + * wait CCF + clear CCF between blocks. The IP decrypts each + * block using DHUK and deposits the unwrapped key into KEYR. + * (2c) Clear EN. + * + * Earlier attempts that wrote CR=0 between phases (or that skipped + * the KEYDERIVATION pre-pass) timed out -- SR.KEYVALID asserts but + * CCF never fires. The HAL approach keeps EN set across MODE + * changes via MODIFY_REG. */ + Stm32SaesClearCCF(); + + /* Step 1a: full CR setup with MODE=KEYDERIVATION, EN=0. + * + * On U3 and WBA52 with TZEN=0 and KEYSEL=HW (DHUK), the + * KEYDERIVATION pass completes but the subsequent DECRYPT pass + * that deposits the unwrapped key into KEYR does not complete + * (CCF never asserts; SR.KEYVALID=1; no ISR error). Setting + * AES_CR_KEYPROT did not help. The wrapped-key-to-KEYR deposit + * appears to be a secure-state-only operation on this silicon + * even with TZEN=0. DHUK Wrap (encrypt-with-DHUK) is reachable + * from NS; DhukOp's unwrap-and-load is not. Documented; caller + * falls back. */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | AES_CR_KMOD_0 | + AES_CR_KEYSEL_0 | /* KEYSEL = HW (DHUK) */ + AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR = cr; + + /* Step 1b: enable, wait CCF for prep pass, clear CCF. */ + SAES->CR |= AES_CR_EN; + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + goto exit; + } + Stm32SaesClearCCF(); + + /* Step 2a: switch MODE to DECRYPT via MODIFY_REG-style write. + * Read-modify-write preserves EN and all other bits. */ + cr2 = SAES->CR; + cr2 = (cr2 & ~AES_CR_MODE) | AES_CR_MODE_1; /* DECRYPT */ + SAES->CR = cr2; + + /* Step 2b: push 8 wrapped-key words via DINR in 2 four-word + * blocks, wait CCF + clear between. No DOUTR read on unwrap -- + * the result is internally moved to KEYR. */ + for (i = 0; i < 8u; i += 4u) { + SAES->DINR = wrappedKey[i + 0u]; + SAES->DINR = wrappedKey[i + 1u]; + SAES->DINR = wrappedKey[i + 2u]; + SAES->DINR = wrappedKey[i + 3u]; + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + goto exit; + } + Stm32SaesClearCCF(); } -#ifdef WOLFSSL_DHUK - /* Use hardware key */ - if (useSaes && (aes->devId == WOLFSSL_DHUK_DEVID || - aes->devId == WOLFSSL_SAES_DEVID)) { + /* Step 2c: disable EN. KEYR now holds the unwrapped key. */ + SAES->CR &= ~AES_CR_EN; + ForceZero(wrappedKey, sizeof(wrappedKey)); - /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from - random.c turn off the RNG clock -- re-enable the clock here */ - __HAL_RCC_RNG_CLK_ENABLE(); + /* Step 2: ECB/CBC with the unwrapped key now in KEYR. KMOD and + * KEYSEL go back to NORMAL; decrypt needs a key-derivation prep + * pass first (last-round-first schedule). CHMOD selects ECB/CBC. */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | chmod; /* KMOD=0, KEYSEL=0 */ + if (!isEnc) { + SAES->CR = cr | AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR |= AES_CR_EN; + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + goto exit; + } + Stm32SaesClearCCF(); + SAES->CR &= ~AES_CR_EN; + cr |= AES_CR_MODE_1; /* MODE = DECRYPT */ + } + SAES->CR = cr; + + /* CBC: load IV from aes->reg into IVR3..IVR0 (MSB first) before + * setting EN. HW does IV chaining + update. SAES->IVRx direct + * because WC_STM32_AES_INST may resolve to AES on dual-IP chips. */ + if (chmod == STM32_AES_CHMOD_CBC) { + word32 v[4]; + XMEMSET(v, 0, sizeof(v)); + XMEMCPY(v, aes->reg, WC_AES_BLOCK_SIZE); + ByteReverseWords(v, v, 16); + SAES->IVR3 = v[0]; + SAES->IVR2 = v[1]; + SAES->IVR1 = v[2]; + SAES->IVR0 = v[3]; + ForceZero(v, sizeof(v)); + } - hcryp->Instance = SAES; - hcryp->Init.DataType = CRYP_DATATYPE_8B; + /* CBC-decrypt: save last input block for next IV before in-place + * decrypt clobbers it. */ + if (chmod == STM32_AES_CHMOD_CBC && !isEnc) { + XMEMCPY(prevCt, in + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + } - /* Key select (HW, or Normal) */ - if (aes->devId == WOLFSSL_DHUK_DEVID) { - hcryp->Init.KeySelect = CRYP_KEYSEL_HW; - } - else { - hcryp->Init.KeySelect = CRYP_KEYSEL_NORMAL; - hcryp->Init.KeyMode = CRYP_KEYMODE_NORMAL; - hcryp->Init.pKey = (uint32_t*)aes->key; - } - } else -#endif - { - hcryp->Instance = CRYP; - hcryp->Init.DataType = CRYP_DATATYPE_8B; - hcryp->Init.pKey = (STM_CRYPT_TYPE*)aes->key; + SAES->CR |= AES_CR_EN; + + /* Process input blocks. */ + blocks = sz / WC_AES_BLOCK_SIZE; + for (i = 0; i < blocks; i++) { + word32 buf[4]; + word32 j; + XMEMCPY(buf, in + i * WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + for (j = 0; j < 4u; j++) { + SAES->DINR = buf[j]; + } + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + goto exit; + } + for (j = 0; j < 4u; j++) { + buf[j] = SAES->DOUTR; + } + Stm32SaesClearCCF(); + XMEMCPY(out + i * WC_AES_BLOCK_SIZE, buf, WC_AES_BLOCK_SIZE); } -#ifdef STM32_HAL_V2 - hcryp->Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; - #if defined(CRYP_HEADERWIDTHUNIT_BYTE) && defined(STM_CRYPT_HEADER_WIDTH) - hcryp->Init.HeaderWidthUnit = - (STM_CRYPT_HEADER_WIDTH == 4) ? - CRYP_HEADERWIDTHUNIT_WORD : - CRYP_HEADERWIDTHUNIT_BYTE; - #endif + + SAES->CR &= ~AES_CR_EN; + + /* CBC: save IV for next call (last ciphertext block). */ + if (chmod == STM32_AES_CHMOD_CBC) { + if (isEnc) { + XMEMCPY(aes->reg, out + sz - WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, prevCt, WC_AES_BLOCK_SIZE); + } + } + + ret = 0; + +exit: + /* Scrub the in-flight wrapped-key buffer and the SAES key/IV + * state. After DhukOp the unwrapped key would otherwise be + * resident in KEYR until the next operation overwrote it; on a + * platform where a privileged or debug reader can sample the + * register file, that would defeat the DHUK threat model. Force + * a hardware reset of the IP via IPRST when the CMSIS exposes + * it (newer SAES variants); always disable EN and zero our local + * staging buffer. */ + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; #endif + /* CCF clear after IP reset; harmless if IPRST already cleared CCF. */ + Stm32SaesClearCCF(); + ForceZero(wrappedKey, sizeof(wrappedKey)); + ForceZero(prevCt, sizeof(prevCt)); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} +#endif /* WOLFSSL_STM32_DHUK_UNWRAP */ + +/* Back-compat ECB-only wrapper. */ +int wc_Stm32_Aes_DhukOp(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + return wc_Stm32_Aes_DhukOp_ex(aes, out, in, sz, isEnc, 0 /* isCbc */); +} +#if defined(WC_STM32_HAS_DHUK) + +#ifdef WOLF_CRYPTO_CB +/* ---- STM32 DHUK SAES backend (driven by the crypto-callback device below) -- + * Derive-from-seed model: a 256-bit seed is mixed with the silicon DHUK so a + * device-bound working key lands in SAES KEYR; the key never enters SW. The + * derive and the symmetric op run together under one crypto-mutex hold so KEYR + * stays valid between them. + * + * Validated on STM32U385 (TZEN=0): GMAC is deterministic and round-trip + * verifies. The key-derivation/decrypt passes complete via SR.BUSY clearing + * plus SR.KEYVALID, NOT via CCF (CCF is only raised for data-output passes); + * waiting on CCF for the key path is what previously caused WC_TIMEOUT_E. */ + +/* AES modes for Stm32Dhuk_Aes (was in the removed dhuk.h). */ +#define WC_DHUK_MODE_ECB 0 +#define WC_DHUK_MODE_CBC 1 + +/* The per-key 256-bit seed is NOT held in a shared static -- each operation + * reads it directly from its own Aes/ecc_key object and derives the working + * key under the HW crypto mutex (see Stm32SaesDeriveKeyFromSeed, which copies + * the seed into a ForceZero'd local). This avoids a cross-thread race where a + * seed staged outside the mutex could be overwritten before it was consumed. + * DHUK operations are serialized by wolfSSL_CryptHwMutexLock(). */ + +static int Stm32Dhuk_Init(void* beCtx) +{ + (void)beCtx; return 0; } -void wc_Stm32_Aes_Cleanup(void) +static void Stm32Dhuk_Cleanup(void* beCtx) { -#ifdef STM32_HW_CLOCK_AUTO - /* disable the peripheral clock */ - __HAL_RCC_CRYP_CLK_DISABLE(); -#endif + (void)beCtx; } -#else /* Standard Peripheral Library */ -int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, - CRYP_KeyInitTypeDef* keyInit) +/* Derive a DHUK-bound working key into SAES KEYR from a 256-bit seed. + * Caller must already hold the crypto mutex and have completed SAES init. + * (1) KMOD=WRAPPED, KEYSEL=HW(DHUK), MODE=KEYDERIVATION; enable. + * (2) MODE=DECRYPT; re-enable EN (auto-cleared after pass 1); push the seed. + * Completion of the key-path passes is signalled by SR.BUSY clearing plus + * SR.KEYVALID, NOT by CCF (CCF is only raised for data-output passes). */ +static int Stm32SaesDeriveKeyFromSeed(const byte* seed, word32 seedSz) { - int ret; + word32 seedWords[8]; + word32 i; + word32 cr; + word32 spin; + int ret = 0; + + if (seed == NULL || seedSz != 32u) { + return BAD_FUNC_ARG; + } + XMEMCPY(seedWords, seed, 32); + + Stm32SaesClearCCF(); + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | AES_CR_KMOD_0 | + AES_CR_KEYSEL_0 | AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + spin = 0u; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++spin >= (word32)STM32_BARE_SAES_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto done; + } + } + Stm32SaesClearCCF(); + + cr = (SAES->CR & ~AES_CR_MODE) | AES_CR_MODE_1; /* MODE = DECRYPT */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; /* re-enable (auto-cleared) */ + for (i = 0; i < 8u; i += 4u) { + SAES->DINR = seedWords[i + 0u]; + SAES->DINR = seedWords[i + 1u]; + SAES->DINR = seedWords[i + 2u]; + SAES->DINR = seedWords[i + 3u]; + spin = 0u; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++spin >= (word32)STM32_BARE_SAES_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto done; + } + } + Stm32SaesClearCCF(); + } + if ((SAES->SR & AES_SR_KEYVALID) == 0u) { + ret = WC_HW_E; + goto done; + } + SAES->CR &= ~AES_CR_EN; + +done: + ForceZero(seedWords, sizeof(seedWords)); + return ret; +} + +/* GMAC tag using a key derived from the staged seed via the silicon DHUK. */ +static int Stm32Dhuk_Gmac(const byte* seed, const byte* iv, word32 ivSz, + const byte* aad, word32 aadSz, byte* tag, word32 tagSz) +{ + /* The Gcm struct (with its GHASH table) is the large object here; move it + * off the BARE stack onto the heap under WOLFSSL_SMALL_STACK. */ + Gcm* gcmp; +#ifndef WOLFSSL_SMALL_STACK + Gcm gcm_stack; +#endif + byte H[WC_AES_BLOCK_SIZE]; + byte J0[WC_AES_BLOCK_SIZE]; + byte Ek_J0[WC_AES_BLOCK_SIZE]; + byte Y[WC_AES_BLOCK_SIZE]; + word32 buf[4]; + word32 i; + word32 cr; + int saes_locked = 0; + int ret; + + if (seed == NULL || iv == NULL || tag == NULL) { + return BAD_FUNC_ARG; + } + if (ivSz == 0u) { + return BAD_FUNC_ARG; + } + if (tagSz < 4u || tagSz > WC_AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + if (aad == NULL && aadSz > 0u) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + gcmp = (Gcm*)XMALLOC(sizeof(*gcmp), NULL, DYNAMIC_TYPE_AES); + if (gcmp == NULL) { + return MEMORY_E; + } +#else + gcmp = &gcm_stack; +#endif + XMEMSET(gcmp, 0, sizeof(*gcmp)); + XMEMSET(H, 0, sizeof(H)); + XMEMSET(J0, 0, sizeof(J0)); + XMEMSET(Ek_J0, 0, sizeof(Ek_J0)); + XMEMSET(Y, 0, sizeof(Y)); + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto exit; + } + saes_locked = 1; + + Stm32SaesEnsureRng(); +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); +#endif + ret = Stm32SaesWaitInit(); + if (ret != 0) { + goto exit; + } + + /* Derive the DHUK-bound working key into SAES KEYR from the caller's seed. */ + ret = Stm32SaesDeriveKeyFromSeed(seed, 32u); + if (ret != 0) { + goto exit; + } + + /* ---- ECB-ENCRYPT with the derived key: H = AES_Ek(0), Ek_J0 = AES_Ek(J0); + * GHASH over AAD in SW; tag = GHASH XOR Ek_J0, truncated. ---- */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE; /* KMOD/KEYSEL=NORMAL, ECB */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + + /* H = AES_Ek(0^128) */ + XMEMSET(buf, 0, sizeof(buf)); + ret = Stm32SaesEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(H, buf, WC_AES_BLOCK_SIZE); + XMEMCPY(gcmp->H, buf, WC_AES_BLOCK_SIZE); +#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) + /* Table-based GHASH multiplies via gcm.M0, not gcm.H, so the table must + * be built from H before any GHASH call below. GCM_SMALL/GCM_WORD32 use + * gcm.H directly and do not define GenerateM0. */ + GenerateM0(gcmp); +#endif + ForceZero(buf, sizeof(buf)); + + /* J0: 12-byte IV fast path, else GHASH-J0 per NIST SP 800-38D. */ + if (ivSz == 12u) { + XMEMCPY(J0, iv, 12); + J0[12] = 0x00; + J0[13] = 0x00; + J0[14] = 0x00; + J0[15] = 0x01; + } + else { + GHASH(gcmp, NULL, 0, iv, ivSz, J0, WC_AES_BLOCK_SIZE); + } + + /* Ek_J0 = AES_Ek(J0) */ + XMEMCPY(buf, J0, WC_AES_BLOCK_SIZE); + ret = Stm32SaesEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(Ek_J0, buf, WC_AES_BLOCK_SIZE); + ForceZero(buf, sizeof(buf)); + + SAES->CR &= ~AES_CR_EN; + + GHASH(gcmp, aad, aadSz, NULL, 0, Y, WC_AES_BLOCK_SIZE); + for (i = 0; i < WC_AES_BLOCK_SIZE; i++) { + Y[i] ^= Ek_J0[i]; + } + XMEMCPY(tag, Y, tagSz); + ret = 0; + +exit: + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; +#endif + Stm32SaesClearCCF(); + ForceZero(H, sizeof(H)); + ForceZero(J0, sizeof(J0)); + ForceZero(Ek_J0, sizeof(Ek_J0)); + ForceZero(Y, sizeof(Y)); + ForceZero(gcmp, sizeof(*gcmp)); + if (saes_locked) { + wolfSSL_CryptHwMutexUnLock(); + } +#ifdef WOLFSSL_SMALL_STACK + XFREE(gcmp, NULL, DYNAMIC_TYPE_AES); +#endif + return ret; +} + +/* AES ECB/CBC using a key derived from the staged seed via the silicon DHUK. + * mode = WC_DHUK_MODE_ECB / _CBC; enc != 0 to encrypt. For CBC, iv is the + * 16-byte chaining value. The derived key never enters software. */ +static int Stm32Dhuk_Aes(const byte* seed, int mode, int enc, const byte* in, + word32 sz, byte* out, const byte* iv, word32 ivSz) +{ + word32 chmod; + word32 cr; + word32 i; + word32 blocks; + int saes_locked = 0; + int ret; + + if (seed == NULL || in == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0u || (sz % WC_AES_BLOCK_SIZE) != 0u) { + return BAD_FUNC_ARG; + } + if (mode == WC_DHUK_MODE_ECB) { + chmod = STM32_AES_CHMOD_ECB; + } + else if (mode == WC_DHUK_MODE_CBC) { + if (iv == NULL || ivSz != WC_AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + chmod = STM32_AES_CHMOD_CBC; + } + else { + return BAD_FUNC_ARG; /* CTR not supported on this path yet */ + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + saes_locked = 1; + + Stm32SaesEnsureRng(); +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); +#endif + ret = Stm32SaesWaitInit(); + if (ret != 0) { + goto exit; + } + + ret = Stm32SaesDeriveKeyFromSeed(seed, 32u); + if (ret != 0) { + goto exit; + } + + /* ECB/CBC with the derived key now in KEYR (KMOD=NORMAL, KEYSEL=NORMAL). + * Decrypt needs a KEYDERIVATION prep pass first (last-round-first key + * schedule); that prep is a key-path pass -> wait BUSY, not CCF. */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | chmod; + if (!enc) { + /* Normal-mode (KMOD=NORMAL) decrypt key-schedule prep: this IS a + * data/compute pass and raises CCF (unlike the wrapped-key DHUK derive, + * which signals via BUSY/KEYVALID). Waiting on BUSY here clears too + * early and yields an incomplete inverse schedule. */ + SAES->CR = cr | AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR |= AES_CR_EN; + ret = Stm32SaesWaitCCF(); + if (ret != 0) { + goto exit; + } + Stm32SaesClearCCF(); + SAES->CR &= ~AES_CR_EN; + cr |= AES_CR_MODE_1; /* MODE = DECRYPT */ + } + SAES->CR = cr; + + if (chmod == STM32_AES_CHMOD_CBC) { + word32 v[4]; + XMEMSET(v, 0, sizeof(v)); + XMEMCPY(v, iv, WC_AES_BLOCK_SIZE); + ByteReverseWords(v, v, 16); + SAES->IVR3 = v[0]; + SAES->IVR2 = v[1]; + SAES->IVR1 = v[2]; + SAES->IVR0 = v[3]; + ForceZero(v, sizeof(v)); + } + + SAES->CR |= AES_CR_EN; + blocks = sz / WC_AES_BLOCK_SIZE; + for (i = 0; i < blocks; i++) { + word32 buf[4]; + XMEMCPY(buf, in + i * WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + ret = Stm32SaesEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(out + i * WC_AES_BLOCK_SIZE, buf, WC_AES_BLOCK_SIZE); + ForceZero(buf, sizeof(buf)); + } + SAES->CR &= ~AES_CR_EN; + ret = 0; + +exit: + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; +#endif + Stm32SaesClearCCF(); + if (saes_locked) { + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +#if defined(HAVE_ECC) && defined(WOLFSSL_STM32_PKA) +/* Forward declarations: these PKA curve-param converters are defined later in + * the WOLFSSL_STM32_PKA section of this file. STM32_MAX_ECC_SIZE comes from + * wolfssl/wolfcrypt/port/st/stm32.h. */ +static int stm32_get_from_hexstr(const char* hex, uint8_t* dst, int sz); +static int stm32_getabs_from_hexstr(const char* hex, uint8_t* dst, int sz, + uint32_t *abs_sign); +static int stm32_get_from_mp_int(uint8_t *dst, const mp_int *a, int sz); + +/* Scratch buffers for Stm32Dhuk_Sign, grouped so the whole set (~11 * + * STM32_MAX_ECC_SIZE bytes) can move off the scarce BARE stack onto the heap + * under WOLFSSL_SMALL_STACK. */ +typedef struct Stm32DhukSignBufs { + uint8_t Keybin[STM32_MAX_ECC_SIZE]; + uint8_t Intbin[STM32_MAX_ECC_SIZE]; + uint8_t Rbin[STM32_MAX_ECC_SIZE]; + uint8_t Sbin[STM32_MAX_ECC_SIZE]; + uint8_t Hashbin[STM32_MAX_ECC_SIZE]; + uint8_t prime[STM32_MAX_ECC_SIZE]; + uint8_t coefA[STM32_MAX_ECC_SIZE]; +#ifdef WOLFSSL_STM32_PKA_V2 + uint8_t coefB[STM32_MAX_ECC_SIZE]; +#endif + uint8_t gen_x[STM32_MAX_ECC_SIZE]; + uint8_t gen_y[STM32_MAX_ECC_SIZE]; + uint8_t order[STM32_MAX_ECC_SIZE]; +} Stm32DhukSignBufs; + +/* ECDSA sign with a DHUK-protected private key. The staged seed derives an + * intermediate AES key inside SAES (key never in SW); that key AES-ECB-decrypts + * the wrapped private scalar (key->dhuk_wrapped_priv) into a short-lived + * buffer; HAL_PKA_ECDSASign runs; the scalar is ForceZero-scrubbed. Output is a + * DER-encoded signature (cryptocb EccSign contract). */ +static int Stm32Dhuk_Sign(void* beCtx, const struct ecc_key* keyIn, + const byte* hash, word32 hashLen, byte* sig, word32* sigLen, + struct WC_RNG* rng) +{ + ecc_key* key = (ecc_key*)keyIn; + PKA_ECDSASignInTypeDef pka_ecc; + PKA_ECDSASignOutTypeDef pka_ecc_out; + mp_int gen_k; + mp_int order_mp; + mp_int r; + mp_int s; + /* Scratch grouped into *b (heap under WOLFSSL_SMALL_STACK, stack + * otherwise); the names below alias into it so the body is unchanged. */ + Stm32DhukSignBufs* b; +#ifndef WOLFSSL_SMALL_STACK + Stm32DhukSignBufs b_stack; +#endif + uint8_t *Keybin, *Intbin, *Rbin, *Sbin, *Hashbin, *prime, *coefA; +#ifdef WOLFSSL_STM32_PKA_V2 + uint8_t *coefB; +#endif + uint8_t *gen_x, *gen_y, *order; + uint32_t coefA_sign = 1; + word32 cr; + word32 i; + word32 blocks; + int size; + int status; + int saes_locked = 0; + + (void)beCtx; + XMEMSET(&pka_ecc, 0, sizeof(pka_ecc)); + XMEMSET(&pka_ecc_out, 0, sizeof(pka_ecc_out)); + + if (key == NULL || sig == NULL || sigLen == NULL || hash == NULL || + rng == NULL || key->dp == NULL) { + return ECC_BAD_ARG_E; + } + if (key->dhuk_seed_sz != 32u) { + return BAD_FUNC_ARG; + } + if (key->dhuk_wrapped_priv_len == 0u || + (key->dhuk_wrapped_priv_len % 16u) != 0u || + key->dhuk_wrapped_priv_len > (word32)STM32_MAX_ECC_SIZE) { + return BAD_FUNC_ARG; + } + size = wc_ecc_size(key); + if ((int)key->dhuk_plain_priv_len != size) { + return BAD_FUNC_ARG; + } + + /* Early validation done -- allocate the scratch and alias the names. From + * here on every return goes through the 'cleanup' label so *b is freed. */ +#ifdef WOLFSSL_SMALL_STACK + b = (Stm32DhukSignBufs*)XMALLOC(sizeof(*b), key->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (b == NULL) { + return MEMORY_E; + } +#else + b = &b_stack; +#endif + Keybin = b->Keybin; Intbin = b->Intbin; Rbin = b->Rbin; Sbin = b->Sbin; + Hashbin = b->Hashbin; prime = b->prime; coefA = b->coefA; +#ifdef WOLFSSL_STM32_PKA_V2 + coefB = b->coefB; +#endif + gen_x = b->gen_x; gen_y = b->gen_y; order = b->order; + XMEMSET(Keybin, 0, STM32_MAX_ECC_SIZE); + XMEMSET(Intbin, 0, STM32_MAX_ECC_SIZE); + + /* Curve parameters for PKA. */ + status = stm32_get_from_hexstr(key->dp->prime, prime, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->order, order, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Gx, gen_x, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Gy, gen_y, size); + if (status == MP_OKAY) + status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, + &coefA_sign); +#ifdef WOLFSSL_STM32_PKA_V2 + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Bf, coefB, size); +#endif + if (status != MP_OKAY) + goto cleanup; + + /* Random per-sign "k". */ + mp_init(&gen_k); + mp_init(&order_mp); + status = mp_read_unsigned_bin(&order_mp, order, size); + if (status == MP_OKAY) + status = wc_ecc_gen_k(rng, size, &gen_k, &order_mp); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Intbin, &gen_k, size); + mp_clear(&gen_k); + mp_clear(&order_mp); + if (status != MP_OKAY) { + goto cleanup; + } + + /* ---- SAES: derive intermediate key from the seed, then ECB-DECRYPT the + * wrapped scalar into Keybin. ---- */ + status = wolfSSL_CryptHwMutexLock(); + if (status != 0) { + goto cleanup; + } + saes_locked = 1; + + Stm32SaesEnsureRng(); +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); +#endif + status = Stm32SaesWaitInit(); + if (status != 0) { + goto saes_exit; + } + + status = Stm32SaesDeriveKeyFromSeed(key->dhuk_seed, key->dhuk_seed_sz); + if (status != 0) { + goto saes_exit; + } + + /* ECB-DECRYPT the wrapped scalar with the derived key in KEYR. The + * KEYDERIVATION prep is a key-path pass; data blocks use CCF. */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE; + /* Normal-mode decrypt key-schedule prep raises CCF (see do_aes note). */ + SAES->CR = cr | AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR |= AES_CR_EN; + status = Stm32SaesWaitCCF(); + if (status != 0) { + goto saes_exit; + } + Stm32SaesClearCCF(); + SAES->CR &= ~AES_CR_EN; + cr |= AES_CR_MODE_1; /* MODE = DECRYPT */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + + blocks = key->dhuk_wrapped_priv_len / 16u; + for (i = 0; i < blocks; i++) { + word32 buf[4]; + word32 j; + XMEMCPY(buf, key->dhuk_wrapped_priv + i * 16u, 16u); + for (j = 0; j < 4u; j++) { + SAES->DINR = buf[j]; + } + status = Stm32SaesWaitCCF(); + if (status != 0) { + ForceZero(buf, sizeof(buf)); + goto saes_exit; + } + for (j = 0; j < 4u; j++) { + buf[j] = SAES->DOUTR; + } + Stm32SaesClearCCF(); + XMEMCPY(Keybin + i * 16u, buf, 16u); + ForceZero(buf, sizeof(buf)); + } + SAES->CR &= ~AES_CR_EN; + status = 0; + +saes_exit: + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; +#endif + Stm32SaesClearCCF(); + if (saes_locked) { + wolfSSL_CryptHwMutexUnLock(); + } + if (status != 0) { + status = (status > 0) ? WC_HW_E : status; + goto cleanup; + } + + /* ---- PKA ECDSA sign with the recovered scalar. ---- */ + pka_ecc.primeOrderSize = size; + pka_ecc.modulusSize = size; + pka_ecc.coefSign = coefA_sign; + pka_ecc.coef = coefA; +#ifdef WOLFSSL_STM32_PKA_V2 + pka_ecc.coefB = coefB; +#endif + pka_ecc.modulus = prime; + pka_ecc.basePointX = gen_x; + pka_ecc.basePointY = gen_y; + pka_ecc.primeOrder = order; + + XMEMSET(Hashbin, 0, STM32_MAX_ECC_SIZE); + if (hashLen > STM32_MAX_ECC_SIZE) { + status = ECC_BAD_ARG_E; + goto cleanup; + } + else if ((int)hashLen > size) { + XMEMCPY(Hashbin, hash, size); + } + else { + XMEMCPY(Hashbin + (size - hashLen), hash, hashLen); + } + pka_ecc.hash = Hashbin; + pka_ecc.integer = Intbin; + pka_ecc.privateKey = Keybin; + pka_ecc_out.RSign = Rbin; + pka_ecc_out.SSign = Sbin; + + status = HAL_PKA_ECDSASign(&hpka, &pka_ecc, HAL_MAX_DELAY); + if (status != HAL_OK) { + HAL_PKA_RAMReset(&hpka); + status = WC_HW_E; + goto cleanup; + } + HAL_PKA_ECDSASign_GetResult(&hpka, &pka_ecc_out, NULL); + HAL_PKA_RAMReset(&hpka); + + /* DER-encode (r, s) into the caller's signature buffer. */ + mp_init(&r); + mp_init(&s); + status = mp_read_unsigned_bin(&r, Rbin, size); + if (status == MP_OKAY) + status = mp_read_unsigned_bin(&s, Sbin, size); + if (status == MP_OKAY) + status = StoreECC_DSA_Sig(sig, sigLen, &r, &s); + mp_clear(&r); + mp_clear(&s); + +cleanup: + /* Scrub the recovered scalar (Keybin) and the random k (Intbin). */ + ForceZero(Keybin, STM32_MAX_ECC_SIZE); + ForceZero(Intbin, STM32_MAX_ECC_SIZE); +#ifdef WOLFSSL_SMALL_STACK + XFREE(b, key->heap, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return status; +} +#endif /* HAVE_ECC && WOLFSSL_STM32_PKA */ + +/* ---- STM32 DHUK crypto callback ------------------------------------------- + * Flat WOLF_CRYPTO_CB device callback (the established wolfSSL vendor pattern). + * Enable by setting an object's devId to the registered device at init; supply + * the 256-bit derivation seed as the normal AES key (wc_AesGcmSetKey/SetKey -> + * aes->devKey) or, for ECC, via wc_ecc_import_wrapped_private(). The seed never + * yields a software key: SAES derives the device-bound working key internally + * from (seed, silicon DHUK). */ + +#ifndef NO_AES + +/* Return the 256-bit seed an Aes carries in devKey (set via the normal key + * API), or NULL if not a 256-bit seed key. The pointer is valid for the life + * of the Aes object; the consume path copies it under the HW mutex. */ +static const byte* Stm32Dhuk_AesSeed(Aes* aes) +{ + if (aes == NULL || aes->keylen != 32) { + return NULL; + } + return (const byte*)aes->devKey; +} + +/* Route a cipher (AES ECB/CBC, AES-GCM/GMAC) request to the SAES backend. */ +static int Stm32Dhuk_Cipher(struct wc_CryptoInfo* info) +{ + const byte* seed; + int ret; + + switch (info->cipher.type) { +#if defined(HAVE_AES_ECB) || defined(WOLFSSL_AES_DIRECT) || \ + defined(WOLF_CRYPTO_CB_ONLY_AES) + case WC_CIPHER_AES_ECB: + seed = Stm32Dhuk_AesSeed(info->cipher.aesecb.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + return Stm32Dhuk_Aes(seed, WC_DHUK_MODE_ECB, info->cipher.enc, + info->cipher.aesecb.in, info->cipher.aesecb.sz, + info->cipher.aesecb.out, NULL, 0); +#endif +#if defined(HAVE_AES_CBC) + case WC_CIPHER_AES_CBC: + /* Transparent DHUK AES is ECB/GCM only. The STM32 BARE/CUBEMX + * wc_AesCbcEncrypt/Decrypt are the public CBC entry points and do not + * dispatch through the crypto callback, so this case is not reached in + * a real DHUK build; wc_AesCbcEncrypt rejects a DHUK devId directly. + * Fail loud here as defense-in-depth: returning CRYPTOCB_UNAVAILABLE + * would let the SW CBC fallback run with the seed (aes->key) as the AES + * key -- a non-device-bound, wrong-key result. */ + (void)ret; + return NOT_COMPILED_IN; +#endif +#ifdef HAVE_AESGCM + case WC_CIPHER_AES_GCM: + /* GMAC = AES-GCM with empty plaintext. Full GCM payload encryption is + * a follow-on (needs a CTR + GHASH path). For a DHUK key we must NOT + * fall back to SW GCM: the SW path would key off aes->key, which holds + * the derivation seed (not the SAES-derived device key), producing a + * non-device-bound result. Fail loudly instead of returning + * CRYPTOCB_UNAVAILABLE (which would trigger the SW fallback). */ + if (info->cipher.enc) { + if (info->cipher.aesgcm_enc.sz != 0) { + return NOT_COMPILED_IN; + } + seed = Stm32Dhuk_AesSeed(info->cipher.aesgcm_enc.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + return Stm32Dhuk_Gmac(seed, + info->cipher.aesgcm_enc.iv, + info->cipher.aesgcm_enc.ivSz, + info->cipher.aesgcm_enc.authIn, + info->cipher.aesgcm_enc.authInSz, + info->cipher.aesgcm_enc.authTag, + info->cipher.aesgcm_enc.authTagSz); + } + else { + byte tag[WC_AES_BLOCK_SIZE]; + word32 tagSz = info->cipher.aesgcm_dec.authTagSz; + /* See enc note: do not fall back to SW GCM for a DHUK key. */ + if (info->cipher.aesgcm_dec.sz != 0) { + return NOT_COMPILED_IN; + } + if (tagSz == 0 || tagSz > sizeof(tag)) { + return BAD_FUNC_ARG; + } + seed = Stm32Dhuk_AesSeed(info->cipher.aesgcm_dec.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + XMEMSET(tag, 0, sizeof(tag)); + ret = Stm32Dhuk_Gmac(seed, + info->cipher.aesgcm_dec.iv, + info->cipher.aesgcm_dec.ivSz, + info->cipher.aesgcm_dec.authIn, + info->cipher.aesgcm_dec.authInSz, + tag, tagSz); + if (ret != 0) { + ForceZero(tag, sizeof(tag)); + return ret; + } + /* Constant-time tag compare (0 == equal); ConstantCompare avoids a + * local re-implementation of the secret compare. */ + ret = ConstantCompare(tag, info->cipher.aesgcm_dec.authTag, + (int)tagSz); + ForceZero(tag, sizeof(tag)); + return (ret == 0) ? 0 : AES_GCM_AUTH_E; + } +#endif + default: + return CRYPTOCB_UNAVAILABLE; + } +} +#endif /* !NO_AES */ + +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && defined(WOLFSSL_STM32_PKA) +/* Route an ECDSA sign request to the SAES/PKA backend. */ +static int Stm32Dhuk_PkSign(struct wc_CryptoInfo* info) +{ + ecc_key* key = info->pk.eccsign.key; + + if (key == NULL) { + return CRYPTOCB_UNAVAILABLE; + } +#ifdef WOLFSSL_STM32_CCB + /* CCB-protected key: the scalar is unwrapped SAES->PKA in hardware and the + * signature returned as raw (r,s); encode it as the DER ECDSA-Sig output. */ + if (key->dhuk_is_ccb) { + byte r[MAX_ECC_BYTES]; + byte s[MAX_ECC_BYTES]; + word32 sz = (word32)wc_ecc_size(key); + int ret; + + ret = wc_Stm32_Ccb_EccSign(ECC_SECP256R1, key->ccb_iv, key->ccb_tag, + key->dhuk_wrapped_priv, + key->dhuk_wrapped_priv_len, + info->pk.eccsign.in, info->pk.eccsign.inlen, + r, s); + if (ret == 0) { + ret = wc_ecc_rs_raw_to_sig(r, sz, s, sz, + info->pk.eccsign.out, + info->pk.eccsign.outlen); + } + ForceZero(r, sizeof(r)); + ForceZero(s, sizeof(s)); + return ret; + } +#endif + if (key->dhuk_seed_sz != 32u) { + return CRYPTOCB_UNAVAILABLE; + } + /* Stm32Dhuk_Sign reads key->dhuk_seed directly under the HW mutex. */ + return Stm32Dhuk_Sign(NULL, key, + info->pk.eccsign.in, info->pk.eccsign.inlen, + info->pk.eccsign.out, info->pk.eccsign.outlen, + info->pk.eccsign.rng); +} +#endif /* HAVE_ECC && HAVE_ECC_SIGN && WOLFSSL_STM32_PKA */ + +/* The crypto-callback device entry point (registered by wc_Stm32_DhukRegister). + * Returns CRYPTOCB_UNAVAILABLE for anything it does not handle so the caller + * falls back to software. */ +static int Stm32_CryptoDevCb(int devId, struct wc_CryptoInfo* info, void* ctx) +{ + (void)devId; + (void)ctx; + if (info == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + + switch (info->algo_type) { +#ifndef NO_AES + case WC_ALGO_TYPE_CIPHER: + return Stm32Dhuk_Cipher(info); +#endif +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && defined(WOLFSSL_STM32_PKA) + case WC_ALGO_TYPE_PK: + if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) { + return Stm32Dhuk_PkSign(info); + } +#ifdef WOLFSSL_STM32_CCB + /* Transparent provisioning: wc_ecc_make_key() on a WC_DHUK_DEVID + * key binds a fresh CCB-protected blob to it (no CCB-specific API). */ + if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { + return wc_ecc_dev_make_key(info->pk.eckg.rng, + info->pk.eckg.size, info->pk.eckg.key, + info->pk.eckg.curveId); + } +#endif + return CRYPTOCB_UNAVAILABLE; +#endif + default: + return CRYPTOCB_UNAVAILABLE; + } +} + +/* Register the STM32 DHUK device at devId (e.g. WC_DHUK_DEVID). After this, + * objects whose devId is set to it at init route transparently to SAES. */ +int wc_Stm32_DhukRegister(int devId) +{ + int ret = Stm32Dhuk_Init(NULL); + if (ret != 0) { + return ret; + } + return wc_CryptoCb_RegisterDevice(devId, Stm32_CryptoDevCb, NULL); +} + +void wc_Stm32_DhukUnRegister(int devId) +{ + wc_CryptoCb_UnRegisterDevice(devId); + Stm32Dhuk_Cleanup(NULL); +} +#endif /* WOLF_CRYPTO_CB */ + +#ifdef WOLFSSL_STM32_CCB +/* --------------------------------------------------------------------------- + * CCB (Coupling and Chaining Bridge) -- STM32U3 (e.g. U385, RM0487 ch 31) and + * STM32C5 (e.g. C5A3, RM0522). The CCB chains PKA <-> SAES <-> RNG over a local + * interconnect so a DHUK-protected private key is used by the PKA without ever + * entering software or crossing the system bus. + * ------------------------------------------------------------------------- */ + +/* Bound for CCB BUSY / OPSTEP polling (loop iterations). */ +#ifndef WC_STM32_CCB_TIMEOUT + #define WC_STM32_CCB_TIMEOUT 1000000u +#endif + +/* PKA RAM as 32-bit words. STM32C5 types PKA->RAM as uint8_t[] -- byte access + * bus-faults, word access is required; U3 types it as uint32_t[]. Cast to a + * 32-bit word pointer so the slot indices below address the RAM correctly on + * both families (mirrors wc_stm32_pka_prep_ram in the standalone PKA path). */ +#define WC_CCB_PKA_RAMW ((volatile uint32_t*)(void*)PKA->RAM) + + +/* Initialize the CCB peripheral per RM0487 31.5.1: enable the RNG / PKA / SAES + * / CCB clocks, pulse CCB_CR.IPRST and wait for CCB_SR.BUSY to clear, then + * confirm no operation error (CCB_SR.OPERR) latched. GTZC is left at its reset + * configuration -- ST's CCB MspInit configures no GTZC and runs from the + * non-secure alias, so the TZEN=0 build uses the same. */ +static int Stm32Ccb_Init(void) +{ + word32 t; + word32 operr; + + /* Clocks for every peer the CCB chains (RM 31.5.1 steps 2-5). */ +#ifdef WC_STM32_PKA_CLK_ENABLE + WC_STM32_PKA_CLK_ENABLE(); +#endif +#ifdef WC_STM32_SAES_CLK_ENABLE + WC_STM32_SAES_CLK_ENABLE(); +#endif +#ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); +#endif +#ifdef WC_STM32_CCB_CLK_ENABLE + WC_STM32_CCB_CLK_ENABLE(); +#endif +#ifdef WC_STM32_CCB_RST_PKA + /* Reset PKA / SAES / RNG so the CCB starts from a clean peripheral state. + * Prior standalone use of an engine -- wc_InitRng seeding the RNG, ECC + * keygen using the PKA -- can leave it in a mode that stalls the CCB's + * chained SAES GCM step (CCF never asserts, the create phase times out). + * A prior CCB op masks the problem by leaving the engines CCB-configured, + * so without this reset the very first CCB op after other crypto fails. + * Register names are family-abstracted (WC_STM32_CCB_RSTR/RST_*). */ + RCC->WC_STM32_CCB_RSTR |= (WC_STM32_CCB_RST_PKA | WC_STM32_CCB_RST_SAES | + WC_STM32_CCB_RST_RNG); + __DSB(); + RCC->WC_STM32_CCB_RSTR &= ~(WC_STM32_CCB_RST_PKA | WC_STM32_CCB_RST_SAES | + WC_STM32_CCB_RST_RNG); + __DSB(); +#endif +#ifdef RCC_CR_SHSION + /* The SAES kernel clock is the SHSI (secure HSI); the CCB drives the SAES + * to unwrap the DHUK blob, so SHSI must be running or the SAES never + * computes -- CCF stalls and the GCM steps time out. The SAESSEL mux + * defaults to SHSI, so just enable it and wait for ready (ST does this in + * HAL_CRYP_MspInit). Without this the CCB only works if some prior SAES op + * happened to turn SHSI on. */ + if ((RCC->CR & RCC_CR_SHSION) == 0U) { + t = 0; + RCC->CR |= RCC_CR_SHSION; + while ((RCC->CR & RCC_CR_SHSIRDY) == 0U) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + break; + } + } + __DMB(); + } +#endif + + /* Reset the CCB: set IPRST, wait while BUSY, then clear IPRST. */ + CCB->CR |= CCB_CR_IPRST; + __DSB(); + t = 0; + while ((CCB->SR & WC_STM32_CCB_SR_BUSY) != 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + CCB->CR &= ~CCB_CR_IPRST; + return WC_TIMEOUT_E; + } + } + CCB->CR &= ~CCB_CR_IPRST; + + /* Nothing is running yet, so no operation error should be latched. */ + operr = (CCB->SR & CCB_SR_OPERR) >> CCB_SR_OPERR_Pos; + if (operr != 0u) { + return WC_HW_E; + } + return 0; +} + +/* Public M0 entry: bring up the CCB and report whether it is usable. Returns 0 + * on success, WC_TIMEOUT_E if BUSY never clears, WC_HW_E if OPERR latched. */ +int wc_Stm32_CcbInit(void) +{ + int ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + ret = Stm32Ccb_Init(); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* ---- Bare-metal CCB ECDSA OPSTEP driver (ported from RM0487 ch31 / ST + * HAL_CCB). The CCB couples PKA RAM writes to the SAES (GCM) so the wrapped + * scalar is decrypted SAES->PKA over the local bus and never enters software. + * Reuses the bare PKA helpers (wc_stm32_pka_load_param_be / _read_be) and the + * SAES CCF macros. Currently P-256 (ECC_SECP256R1). ---- */ + +/* CCB operation / PKA-mode / magic constants (RM0487, ST hal_ccb). */ +#define WC_CCB_OP_SIGN_USE 0x000000C3u /* CCOP: ECDSA blob-use sign */ +#define WC_CCB_OP_CREATE 0x000000C0u /* CCOP: ECDSA CPU blob create */ +#define WC_CCB_OP_SCALAR_USE 0x00000081u /* CCOP: scalar mul (pubkey) */ +#define WC_CCB_PKA_SIGN_MODE 0x24u /* PKA_CR.MODE for ECDSA sign */ +#define WC_CCB_PKA_MUL_MODE 0x20u /* PKA_CR.MODE for ECC scalar mul */ +#define WC_CCB_MAGIC 0x0CCBu /* SAES->PKA chaining magic */ +#define WC_CCB_FAKE 0x0001u /* placeholder fed to RNG->PKA */ +#define WC_CCB_PKA_OK 0x0000D60Du /* PKA_ECDSA_SIGN_OUT_ERROR ok */ + +/* P-256 CCB operand sizing. opsz = PKA operand words = 2*(ceil(32/8)+1) = 10; + * cipsz = SAES ciphertext block count = opsz minus the 2-word PKA pad when + * opsz is not a multiple of 4 = 8. (Single-curve P-256 today.) */ +#define WC_CCB_P256_OPSZ 10u +#define WC_CCB_P256_CIPSZ 8u +/* SAES GCM final-phase header length word for the blob (bit-length encoding). */ +#define WC_CCB_GCM_HDR_LEN(opsz) (((((opsz) * 32u) * 6u) + (3u * 64u)) * 2u) + +/* NIST P-256 parameters (big-endian, 32 bytes). */ +static const byte wc_ccb_p256_aAbs[32] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x03}; +static const byte wc_ccb_p256_b[32] = { + 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7,0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, + 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6,0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; +static const byte wc_ccb_p256_p[32] = { + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; +static const byte wc_ccb_p256_n[32] = { + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84,0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; +static const byte wc_ccb_p256_Gx[32] = { + 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, + 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; +static const byte wc_ccb_p256_Gy[32] = { + 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, + 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; + +/* Wait until CCB_SR.OPSTEP reaches the given step value. */ +static int Stm32Ccb_WaitOpStep(word32 step) +{ + word32 t = 0; + while ((CCB->SR & step) != step) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* Wait for a PKA_SR flag, then clear it via PKA_CLRFR (matches ST). */ +static int Stm32Ccb_PkaWaitFlag(word32 flag) +{ + word32 t = 0; + while ((PKA->SR & flag) == 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + PKA->CLRFR = flag; + return 0; +} + +/* Wait for the SAES CCF (GCM step done), then clear it. */ +static int Stm32Ccb_SaesWaitCcf(void) +{ + word32 t = 0; + while ((SAES->ISR & AES_ISR_CCF) == 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + STM32_AES_CLEAR_INST(SAES); + return 0; +} + +/* Initialize the RNG for CCB use. The U3 RNG only produces DRDY once a + * NIST-compliant config has been written under CONDRST (same as wc_GenerateSeed + * in random.c). Always (re)write it here so prior RNG use -- e.g. wc_InitRng + * leaving a different config -- cannot stall the CCB's RNG->PKA draws. */ +static int Stm32Ccb_RngInit(void) +{ + word32 t = 0; +#if defined(RNG_CAND_NIST_CR_VALUE) && defined(RNG_CR_CONDRST) + RNG->CR = (word32)RNG_CAND_NIST_CR_VALUE | (word32)RNG_CR_CONDRST; +#ifdef RNG_CAND_NIST_NSCR_VALUE + RNG->NSCR = (word32)RNG_CAND_NIST_NSCR_VALUE; +#endif +#ifdef RNG_CAND_NIST_HTCR_VALUE + RNG->HTCR[0] = (word32)RNG_CAND_NIST_HTCR_VALUE; +#endif + RNG->CR &= ~(word32)RNG_CR_CONDRST; /* latch config */ + while ((RNG->CR & RNG_CR_CONDRST) != 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + RNG->CR |= RNG_CR_RNGEN; +#else + RNG->CR = RNG_CR_CONDRST; + RNG->CR = 0u; + while ((RNG->CR & RNG_CR_CONDRST) != 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + RNG->CR = RNG_CR_RNGEN; +#endif + if ((RNG->SR & RNG_SR_SEIS) != 0u) { + return WC_HW_E; + } + t = 0; + while ((RNG->SR & RNG_SR_DRDY) == 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* Enable PKA and set the given protected mode (no interrupts). */ +static int Stm32Ccb_PkaInit(word32 mode) +{ + int ret; + PKA->CR = PKA_CR_EN; + ret = Stm32Ccb_PkaWaitFlag(PKA_SR_INITOK); + if (ret != 0) { + return ret; + } + PKA->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC | PKA_CLRFR_OPERRFC; + PKA->CR = (PKA->CR & ~(PKA_CR_MODE | PKA_CR_PROCENDIE | PKA_CR_RAMERRIE | + PKA_CR_ADDRERRIE | PKA_CR_OPERRIE)) | + (mode << PKA_CR_MODE_Pos); + return 0; +} + +/* Optimal bit length of a big-endian operand (ST GetOptBitSize_u8). */ +static word32 Stm32Ccb_OptBits(word32 nbytes, byte msb) +{ + return ((nbytes - 1u) * 8u) + (32u - (word32)__CLZ((word32)msb)); +} + +/* Write a small scalar (value + zero word) into PKA RAM, coupled: wait CCF. */ +static int Stm32Ccb_SetScalar(word32 slot, word32 val) +{ + WC_CCB_PKA_RAMW[slot] = val; + WC_CCB_PKA_RAMW[slot + 1u] = 0u; + return Stm32Ccb_SaesWaitCcf(); +} + +/* Write a 32-byte (multiple-of-8) big-endian param into PKA RAM 64 bits at a + * time, waiting for the SAES CCF coupling after each pair and the terminator + * (per ST CCB_SetPram during the GCM header phase). */ +static int Stm32Ccb_SetParam(word32 slot, const byte* src, word32 sizeBytes) +{ + word32 operand = 2u * (((sizeBytes + 7u) / 8u) + 1u); + word32 off; + const byte* p; + int ret; + + /* The big-endian 8-byte chunk loop walks src[] downward in 8-byte steps; + * a zero or non-multiple-of-8 sizeBytes would index below src[0] and + * mis-place the operand window. All callers pass a 32-byte field. */ + if (sizeBytes == 0u || (sizeBytes % 8u) != 0u) { + return BAD_FUNC_ARG; + } + + for (off = 0u; off < (operand - 2u); off += 2u) { + p = &src[sizeBytes - ((off * 4u) + 1u)]; + WC_CCB_PKA_RAMW[slot + off] = (word32)p[0] | ((word32)p[-1] << 8) | + ((word32)p[-2] << 16) | ((word32)p[-3] << 24); + WC_CCB_PKA_RAMW[slot + off + 1u] = (word32)p[-4] | ((word32)p[-5] << 8) | + ((word32)p[-6] << 16) | ((word32)p[-7] << 24); + ret = Stm32Ccb_SaesWaitCcf(); + if (ret != 0) { + return ret; + } + } + WC_CCB_PKA_RAMW[slot + ((sizeBytes + 3u) / 4u)] = 0u; + WC_CCB_PKA_RAMW[slot + ((sizeBytes + 3u) / 4u) + 1u] = 0u; + return Stm32Ccb_SaesWaitCcf(); +} + +/* CCB teardown: pulse CCB_CR.IPRST and wait BUSY clear. */ +static void Stm32Ccb_Reset(void) +{ + word32 t = 0; + CCB->CR |= CCB_CR_IPRST; + __DSB(); + while ((CCB->SR & WC_STM32_CCB_SR_BUSY) != 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + break; + } + } + CCB->CR &= ~CCB_CR_IPRST; +} + +/* Write 8 big-endian bytes ending at p (p[0]..p[-7]) as two little-endian PKA + * words at slot -- the 64-bit chunk primitive used for the d / param loads. */ +static void Stm32Ccb_Wr64(word32 slot, const byte* p) +{ + WC_CCB_PKA_RAMW[slot] = (word32)p[0] | ((word32)p[-1] << 8) | + ((word32)p[-2] << 16) | ((word32)p[-3] << 24); + WC_CCB_PKA_RAMW[slot + 1u] = (word32)p[-4] | ((word32)p[-5] << 8) | + ((word32)p[-6] << 16) | ((word32)p[-7] << 24); +} + +/* Load the ECDSA curve params into PKA RAM, each coupled to the SAES GCM header + * (wait CCF). Shared by the bare create and sign paths. */ +static int Stm32Ccb_LoadCurve(void) +{ + int ret; + if ((ret = Stm32Ccb_SetScalar(PKA_ECDSA_SIGN_IN_ORDER_NB_BITS, + Stm32Ccb_OptBits(32u, wc_ccb_p256_n[0]))) != 0) { return ret; } + if ((ret = Stm32Ccb_SetScalar(PKA_ECDSA_SIGN_IN_MOD_NB_BITS, + Stm32Ccb_OptBits(32u, wc_ccb_p256_p[0]))) != 0) { return ret; } + if ((ret = Stm32Ccb_SetScalar(PKA_ECDSA_SIGN_IN_A_COEFF_SIGN, 1u)) != 0) + { return ret; } + if ((ret = Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_A_COEFF, wc_ccb_p256_aAbs, + 32u)) != 0) { return ret; } + if ((ret = Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_B_COEFF, wc_ccb_p256_b, + 32u)) != 0) { return ret; } + if ((ret = Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_MOD_GF, wc_ccb_p256_p, + 32u)) != 0) { return ret; } + if ((ret = Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_ORDER_N, wc_ccb_p256_n, + 32u)) != 0) { return ret; } + if ((ret = Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_INITIAL_POINT_X, + wc_ccb_p256_Gx, 32u)) != 0) { return ret; } + return Stm32Ccb_SetParam(PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y, + wc_ccb_p256_Gy, 32u); +} + +/* Wait until a SAES_SR flag reaches the wanted state (used for BUSY/KEYVALID). */ +static int Stm32Ccb_SaesWaitSr(word32 flag, int wantSet) +{ + word32 t = 0; + while (((SAES->SR & flag) != 0u) != (wantSet != 0)) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* Common prologue shared by the three CCB ECDSA operations (blob create, blob + * use sign, public-key scalar mult): reset/clock the CCB, select the operation + * (CCOP) and wait for OPSTEP 0x01, condition the RNG and PKA (mode = the PKA + * sub-operation), then wait for SAES to go idle so the blob key can be loaded. + * The caller holds the crypto HW mutex (it owns the matching unlock). */ +static int Stm32Ccb_OpBegin(word32 ccop, word32 pkaMode) +{ + int ret = Stm32Ccb_Init(); + if (ret != 0) { + return ret; + } + CCB->CR = (CCB->CR & ~CCB_CR_CCOP) | ccop; + if ((ret = Stm32Ccb_WaitOpStep(0x01u)) != 0) { + return ret; + } + if ((ret = Stm32Ccb_RngInit()) != 0) { + return ret; + } + if ((ret = Stm32Ccb_PkaInit(pkaMode)) != 0) { + return ret; + } + return Stm32Ccb_SaesWaitSr(AES_SR_BUSY, 0); +} + +/* Load the DHUK blob key into SAES (KEYSEL=HW, 256-bit, GCM) and wait for the + * CCB to advance. isUse=1 selects decrypt (MODE_1) for blob use (sign / pubkey) + * and the CCB reaches OPSTEP 0x12; isUse=0 selects encrypt for blob creation and + * the CCB reaches OPSTEP 0x02. */ +static int Stm32Ccb_LoadBlobKey(int isUse) +{ + word32 cr = AES_CR_KEYSEL_0 | AES_CR_KEYSIZE | STM32_AES_CHMOD_GCM; + int ret; + + if (isUse) { + cr |= AES_CR_MODE_1; + } + SAES->CR = cr; + if ((ret = Stm32Ccb_SaesWaitSr(AES_SR_KEYVALID, 1)) != 0) { + return ret; + } + return Stm32Ccb_WaitOpStep(isUse ? 0x12u : 0x02u); +} + +/* CCB RNG draw wait: spin until RNG_SR.DRDY with a bounded timeout. */ +static int Stm32Ccb_RngWaitDrdy(void) +{ + word32 t = 0; + while ((RNG->SR & RNG_SR_DRDY) == 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* CCB SAES busy wait: spin until SAES_SR.BUSY clears with a bounded timeout. */ +static int Stm32Ccb_SaesWaitBusy(void) +{ + word32 t = 0; + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++t >= WC_STM32_CCB_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +/* Load the four blob IV words into SAES IVR0..IVR3 (blob-use ordering). */ +static void Stm32Ccb_LoadIv(const word32* v) +{ + SAES->IVR0 = v[0]; + SAES->IVR1 = v[1]; + SAES->IVR2 = v[2]; + SAES->IVR3 = v[3]; +} + +/* Load the four reference-tag words into the CCB REFTAGR registers. */ +static void Stm32Ccb_LoadRefTag(const word32* v) +{ + CCB->REFTAGR[0] = v[0]; + CCB->REFTAGR[1] = v[1]; + CCB->REFTAGR[2] = v[2]; + CCB->REFTAGR[3] = v[3]; +} + +/* Load the 32-byte hash into PKA RAM as big-endian -> little-endian words. */ +static void Stm32Ccb_LoadHash(const byte* hash) +{ + word32 i; + for (i = 0u; i < 8u; i++) { + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_HASH_E + i] = + (word32)hash[(32u - (i * 4u)) - 1u] | + ((word32)hash[(32u - (i * 4u)) - 2u] << 8) | + ((word32)hash[(32u - (i * 4u)) - 3u] << 16) | + ((word32)hash[(32u - (i * 4u)) - 4u] << 24); + } +} + +/* Blob-use GCM final phase: feed the length block, wait CCF, and verify the + * integrity tag reads back all-zero (nonzero => blob tag mismatch). Shared by + * the scalar-mul (public-key) and ECDSA sign blob-use paths. */ +static int Stm32Ccb_GcmFinalTagCheck(word32 opsz, word32 cipsz) +{ + word32 i; + int ret; + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | + WC_STM32_AES_CR_PHASE_1; + SAES->DINR = 0u; + SAES->DINR = WC_CCB_GCM_HDR_LEN(opsz); + SAES->DINR = 0u; + SAES->DINR = cipsz * 32u; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { + return ret; + } + for (i = 0u; i < 4u; i++) { + if (SAES->DOUTR != 0u) { /* nonzero => blob tag mismatch */ + return WC_HW_E; + } + } + return 0; +} + +/* Bare CCB public-key computation: scalar mult d*G via the blob. Mirrors the + * blob-use sign path, but loads the unwrapped scalar into the K slot (no random + * k) and reads the resulting point into pubX/pubY. */ +static int Stm32Ccb_ComputePub(const byte* iv, const byte* tag, + const byte* wrapped, byte* pubX, byte* pubY) +{ + word32 ivw[4]; + word32 tagw[4]; + word32 wrapw[8]; + word32 opsz; + word32 cipsz; + word32 off; + word32 block; + word32 i; + int ret; + + XMEMCPY(ivw, iv, sizeof(ivw)); + XMEMCPY(tagw, tag, sizeof(tagw)); + XMEMCPY(wrapw, wrapped, sizeof(wrapw)); + opsz = WC_CCB_P256_OPSZ; + cipsz = WC_CCB_P256_CIPSZ; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + ret = Stm32Ccb_OpBegin(WC_CCB_OP_SCALAR_USE, WC_CCB_PKA_MUL_MODE); + if (ret != 0) { goto done; } + if ((ret = Stm32Ccb_LoadBlobKey(1 /* use */)) != 0) { goto done; } + + Stm32Ccb_LoadIv(ivw); + SAES->CR |= AES_CR_EN; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + Stm32Ccb_LoadRefTag(tagw); + /* SAES GCM/chaining-phase field: STM32C5 names it AES_CR_CPHASE, U3 names it + * AES_CR_GCMPH -- same bit positions/values. WC_STM32_AES_CR_PHASE abstracts + * the name (port/st/stm32.h). This was the one genuinely-divergent part of + * the OPSTEP driver; the rest is already family-neutral (WC_STM32_CCB_*). */ + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | + AES_CR_EN; + if ((ret = Stm32Ccb_WaitOpStep(0x13u)) != 0) { goto done; } + + if ((ret = Stm32Ccb_LoadCurve()) != 0) { goto done; } + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; + if ((ret = Stm32Ccb_WaitOpStep(0x14u)) != 0) { goto done; } + + block = 0u; + for (off = 0u; off < cipsz; off++) { + SAES->DINR = wrapw[cipsz - 1u - off]; + if ((off % 4u) == 3u) { + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + for (i = 0u; i < 4u; i++) { + WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + block + i] = WC_CCB_MAGIC; + } + block += 4u; + } + } + WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + cipsz] = 0u; + WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + cipsz + 1u] = 0u; + if ((ret = Stm32Ccb_WaitOpStep(0x17u)) != 0) { goto done; } + + if ((ret = Stm32Ccb_GcmFinalTagCheck(opsz, cipsz)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x18u)) != 0) { goto done; } + + PKA->CR |= PKA_CR_START; + if ((ret = Stm32Ccb_WaitOpStep(0x19u)) != 0) { goto done; } + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_PROCENDF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x1Au)) != 0) { goto done; } + + wc_stm32_pka_read_be(pubX, &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], 32u); + wc_stm32_pka_read_be(pubY, &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], 32u); + ret = 0; + +done: + Stm32Ccb_Reset(); + SAES->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* Bare CCB ECDSA blob creation (P-256, CPU-supplied scalar). Produces the + * device-bound blob {iv, tag, wrapped d}; the clear scalar is fed to the SAES, + * encrypted under the DHUK, and read back encrypted -- it never persists in + * software here beyond the caller's input. The public key is derived by a + * separate compute step (left to the caller / wc_ecc layer). */ +int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, + byte* iv, byte* tag, byte* wrapped, word32* wrappedSz, + byte* pubX, byte* pubY) +{ + word32 ivw[4]; + word32 tagw[4]; + word32 wrapw[8]; + word32 opsz; + word32 cipsz; + word32 off; + word32 block; + word32 i; + int ret; + + if (curveId != ECC_SECP256R1) { + return NOT_COMPILED_IN; + } + /* Require pubX/pubY non-NULL to match the CubeMX/HAL implementation's + * contract (one public API, one NULL-handling rule across build flavors). */ + if (d == NULL || dLen != 32u || iv == NULL || tag == NULL || + wrapped == NULL || wrappedSz == NULL || pubX == NULL || pubY == NULL) { + return BAD_FUNC_ARG; + } + opsz = WC_CCB_P256_OPSZ; + cipsz = WC_CCB_P256_CIPSZ; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + /* CCOP = ECDSA CPU blob creation; load the DHUK blob key in encrypt mode. */ + ret = Stm32Ccb_OpBegin(WC_CCB_OP_CREATE, WC_CCB_PKA_SIGN_MODE); + if (ret != 0) { goto done; } + if ((ret = Stm32Ccb_LoadBlobKey(0 /* create */)) != 0) { goto done; } + + /* Blob-creation initial phase: IVR0=2, IVR1-3 randomised by CCB, read the + * generated IV back, GCM init, header phase. */ + SAES->IVR0 = 0x00000002u; + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + SAES->IVR1 = WC_CCB_FAKE; + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + SAES->IVR2 = WC_CCB_FAKE; + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + SAES->IVR3 = WC_CCB_FAKE; + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + if ((PKA->SR & PKA_SR_RNGERRF) != 0u) { ret = WC_HW_E; goto done; } + ivw[3] = SAES->IVR3; + ivw[2] = SAES->IVR2; + ivw[1] = SAES->IVR1; + ivw[0] = SAES->IVR0; + SAES->CR |= AES_CR_EN; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; + if ((ret = Stm32Ccb_WaitOpStep(0x03u)) != 0) { goto done; } + + /* Curve params (coupled), then GCM payload phase. */ + if ((ret = Stm32Ccb_LoadCurve()) != 0) { goto done; } + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; + if ((ret = Stm32Ccb_WaitOpStep(0x04u)) != 0) { goto done; } + + /* CPU writes the clear scalar d into PKA RAM (BE->LE words from the end). */ + PKA->CLRFR = PKA_CLRFR_CMFC; + for (off = 0u; off < (opsz - 2u); off += 2u) { + Stm32Ccb_Wr64(PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + off, + &d[32u - ((off * 4u) + 1u)]); + } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + off] = 0u; + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + off + 1u] = 0u; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_DATAOKF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x08u)) != 0) { goto done; } + + /* Read back the encrypted scalar from the SAES: writing the magic value to + * PKA RAM triggers the chaining, every 4th word yields a 128-bit block. */ + PKA->CLRFR = PKA_CLRFR_CMFC; + block = 0u; + for (off = 0u; off < cipsz; off++) { + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + off] = WC_CCB_MAGIC; + if ((off % 4u) == 3u) { + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + for (i = 0u; i < 4u; i++) { + wrapw[cipsz - (block + i + 1u)] = SAES->DOUTR; + } + block += 4u; + } + } + if ((ret = Stm32Ccb_SaesWaitBusy()) != 0) { goto done; } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz] = 0u; + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz + 1u] = 0u; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_DATAOKF)) != 0) { goto done; } + +#if defined(WOLFSSL_STM32C5) + /* STM32C5: blob-create is a combined create+SIGN -- the OPSTEP machine only + * advances through the GCM-final tag phase if the random k is drawn and the + * PKA sign is started. Draw k, run the GCM final phase, read the tag, then + * START the PKA; the resulting r,s are a creation by-product and discarded + * (the blob is still {iv, tag, wrapped}). Mirrors the C5 HAL + * CCB_ECDSA_SignBlobCreation. The U3 OPSTEP machine does not require this. */ + if ((ret = Stm32Ccb_WaitOpStep(0x09u)) != 0) { goto done; } + for (off = 0u; off < (opsz - 2u); off++) { + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + off] = WC_CCB_MAGIC; + } + if ((PKA->SR & PKA_SR_RNGERRF) != 0u) { ret = WC_HW_E; goto done; } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + (opsz - 2u)] = 0u; + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + (opsz - 2u) + 1u] = 0u; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_RNGOKF)) != 0) { goto done; } + if ((ret = Stm32Ccb_SaesWaitBusy()) != 0) { goto done; } + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; + PKA->CLRFR = PKA_CLRFR_CMFC; + SAES->DINR = 0u; + SAES->DINR = WC_CCB_GCM_HDR_LEN(opsz); + SAES->DINR = 0u; + SAES->DINR = cipsz * 32u; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + for (i = 0u; i < 4u; i++) { + tagw[i] = SAES->DOUTR; + } + PKA->CR |= PKA_CR_START; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_PROCENDF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x1Au)) != 0) { goto done; } + if (WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_ERROR] != WC_CCB_PKA_OK) { + ret = WC_HW_E; + goto done; + } +#else + /* GCM final phase: feed the length block and read the authentication tag. */ + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; + if ((ret = Stm32Ccb_WaitOpStep(0x0Au)) != 0) { goto done; } + PKA->CLRFR = PKA_CLRFR_CMFC; + SAES->DINR = 0u; + SAES->DINR = WC_CCB_GCM_HDR_LEN(opsz); + SAES->DINR = 0u; + SAES->DINR = cipsz * 32u; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + for (i = 0u; i < 4u; i++) { + tagw[i] = SAES->DOUTR; + } +#endif + + XMEMCPY(iv, ivw, sizeof(ivw)); + XMEMCPY(tag, tagw, sizeof(tagw)); + XMEMCPY(wrapped, wrapw, sizeof(wrapw)); + *wrappedSz = (word32)sizeof(wrapw); + ret = 0; + +done: + Stm32Ccb_Reset(); + SAES->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + /* Derive the public key from the fresh blob (separate locked op). + * pubX/pubY are guaranteed non-NULL by the argument check above. */ + if (ret == 0) { + ret = Stm32Ccb_ComputePub(iv, tag, wrapped, pubX, pubY); + } + return ret; +} + +/* Bare CCB ECDSA blob-use sign (P-256). Drives the CCB OPSTEP machine + * 0x01 -> 0x12 -> 0x13 -> 0x14 -> 0x16 -> 0x17 -> 0x18 -> 0x19 -> 0x1A. */ +int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, + const byte* wrapped, word32 wrappedSz, const byte* hash, word32 hashSz, + byte* r, byte* s) +{ + word32 ivw[4]; + word32 tagw[4]; + word32 wrapw[8]; + word32 opsz; + word32 cipsz; + word32 off; + word32 block; + word32 i; + int ret; + + if (curveId != ECC_SECP256R1) { + return NOT_COMPILED_IN; + } + if (iv == NULL || tag == NULL || wrapped == NULL || wrappedSz != 32u || + hash == NULL || hashSz < 32u || r == NULL || s == NULL) { + return BAD_FUNC_ARG; + } + XMEMCPY(ivw, iv, sizeof(ivw)); + XMEMCPY(tagw, tag, sizeof(tagw)); + XMEMCPY(wrapw, wrapped, sizeof(wrapw)); + + opsz = WC_CCB_P256_OPSZ; + cipsz = WC_CCB_P256_CIPSZ; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + /* CCOP = ECDSA blob-use sign; load the DHUK blob key in decrypt (use) mode. */ + ret = Stm32Ccb_OpBegin(WC_CCB_OP_SIGN_USE, WC_CCB_PKA_SIGN_MODE); + if (ret != 0) { goto done; } + if ((ret = Stm32Ccb_LoadBlobKey(1 /* use */)) != 0) { goto done; } + + /* Hash -> PKA RAM (plain BE->LE words, no terminator, not yet coupled). */ + Stm32Ccb_LoadHash(hash); + + /* Blob-use initial phase: load IV, GCM init, write reference tag. */ + Stm32Ccb_LoadIv(ivw); + SAES->CR |= AES_CR_EN; + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + Stm32Ccb_LoadRefTag(tagw); + /* GCM header phase (keep EN) -> OPSTEP 0x12 -> 0x13. */ + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; + if ((ret = Stm32Ccb_WaitOpStep(0x13u)) != 0) { goto done; } + + /* ECDSA curve params into PKA RAM (each coupled to SAES, wait CCF). */ + if ((ret = Stm32Ccb_LoadCurve()) != 0) { goto done; } + + /* GCM payload phase -> OPSTEP 0x13 -> 0x14. */ + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; + if ((ret = Stm32Ccb_WaitOpStep(0x14u)) != 0) { goto done; } + + /* Feed the wrapped scalar to SAES; the CCB substitutes the decrypted key + * into PKA RAM where the magic value is written (SAES->PKA chaining). */ + block = 0u; + for (off = 0u; off < cipsz; off++) { + SAES->DINR = wrapw[cipsz - 1u - off]; + if ((off % 4u) == 3u) { + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } + for (i = 0u; i < 4u; i++) { + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + block + i] = + WC_CCB_MAGIC; + } + block += 4u; + } + } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz] = 0u; + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz + 1u] = 0u; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_DATAOKF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x16u)) != 0) { goto done; } + + /* Per-nonce k drawn by the RNG over CCB (CPU writes placeholders). */ + for (off = 0u; off < (opsz - 2u); off++) { + if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + off] = WC_CCB_FAKE; + } + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + (opsz - 2u)] = 0u; + WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + (opsz - 2u) + 1u] = 0u; + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_RNGOKF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x17u)) != 0) { goto done; } + + /* Blob-use final phase: GCM length block + tag-integrity check. */ + if ((ret = Stm32Ccb_GcmFinalTagCheck(opsz, cipsz)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x18u)) != 0) { goto done; } + + /* Run the PKA ECDSA signature. */ + PKA->CR |= PKA_CR_START; + if ((ret = Stm32Ccb_WaitOpStep(0x19u)) != 0) { goto done; } + if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_PROCENDF)) != 0) { goto done; } + if ((ret = Stm32Ccb_WaitOpStep(0x1Au)) != 0) { goto done; } + + if (WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_ERROR] != WC_CCB_PKA_OK) { + ret = WC_HW_E; + goto done; + } + wc_stm32_pka_read_be(r, &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], 32u); + wc_stm32_pka_read_be(s, &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], 32u); + ret = 0; + +done: + Stm32Ccb_Reset(); + SAES->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} +#endif /* WOLFSSL_STM32_CCB */ + +#endif /* WC_STM32_HAS_DHUK */ +#endif /* WOLFSSL_DHUK */ + + +#elif defined(WOLFSSL_STM32_CUBEMX) + +#if defined(WOLFSSL_DHUK) +/* Wrap an AES key using the DHUK */ +int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, + word32* outSz, const byte* iv, int ivSz) +{ + CRYP_HandleTypeDef hcryp; + int ret = 0; + byte key[AES_256_KEY_SIZE]; + + /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from random.c + turn off the RNG clock -- re-enable the clock here */ + __HAL_RCC_RNG_CLK_ENABLE(); + ByteReverseWords((word32*)key, (word32*)in, inSz); + XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); + if (ret == 0) { + hcryp.Instance = SAES; + hcryp.Init.DataType = CRYP_DATATYPE_8B; + hcryp.Init.KeySize = CRYP_KEYSIZE_256B; + hcryp.Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; + hcryp.Init.KeySelect = CRYP_KEYSEL_HW; /* use DHUK to unwrap with use */ + hcryp.Init.KeyMode = CRYP_KEYMODE_WRAPPED; + if (iv != NULL) { + hcryp.Init.pInitVect = (uint32_t *)iv; + hcryp.Init.Algorithm = CRYP_AES_CBC; + } + else { + hcryp.Init.Algorithm = CRYP_AES_ECB; + } + ret = HAL_CRYP_Init(&hcryp); + } + + if (ret == HAL_OK) { + ret = HAL_CRYPEx_WrapKey(&hcryp, (uint32_t*)key, (uint32_t*)out, 100); + HAL_CRYP_DeInit(&hcryp); + } + ForceZero(key, sizeof(key)); + + ByteReverseWords((word32*)out, (word32*)out, inSz); + *outSz = inSz; + (void)aes; + return ret; +} + + +#endif + +int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) +{ + int ret; + word32 keySize; +#ifdef STM32_HW_CLOCK_AUTO + /* enable the peripheral clock */ + __HAL_RCC_CRYP_CLK_ENABLE(); +#endif + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + + XMEMSET(hcryp, 0, sizeof(CRYP_HandleTypeDef)); + switch (keySize) { + case 16: /* 128-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_128B; + break; + #ifdef CRYP_KEYSIZE_192B + case 24: /* 192-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_192B; + break; + #endif + case 32: /* 256-bit key */ + hcryp->Init.KeySize = CRYP_KEYSIZE_256B; + break; + default: + break; + } + +#ifdef WOLFSSL_DHUK + /* Use hardware key */ + if (useSaes && (aes->devId == WOLFSSL_DHUK_DEVID || + aes->devId == WOLFSSL_SAES_DEVID)) { + + /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from + random.c turn off the RNG clock -- re-enable the clock here */ + __HAL_RCC_RNG_CLK_ENABLE(); + + hcryp->Instance = SAES; + hcryp->Init.DataType = CRYP_DATATYPE_8B; + + /* Key select (HW, or Normal) */ + if (aes->devId == WOLFSSL_DHUK_DEVID) { + hcryp->Init.KeySelect = CRYP_KEYSEL_HW; + } + else { + hcryp->Init.KeySelect = CRYP_KEYSEL_NORMAL; + hcryp->Init.KeyMode = CRYP_KEYMODE_NORMAL; + hcryp->Init.pKey = (uint32_t*)aes->key; + } + } else +#endif + { + hcryp->Instance = CRYP; + hcryp->Init.DataType = CRYP_DATATYPE_8B; + hcryp->Init.pKey = (STM_CRYPT_TYPE*)aes->key; + } +#ifdef STM32_HAL_V2 + hcryp->Init.DataWidthUnit = CRYP_DATAWIDTHUNIT_BYTE; + #if defined(CRYP_HEADERWIDTHUNIT_BYTE) && defined(STM_CRYPT_HEADER_WIDTH) + hcryp->Init.HeaderWidthUnit = + (STM_CRYPT_HEADER_WIDTH == 4) ? + CRYP_HEADERWIDTHUNIT_WORD : + CRYP_HEADERWIDTHUNIT_BYTE; + #endif +#endif + + return 0; +} + +void wc_Stm32_Aes_Cleanup(void) +{ +#ifdef STM32_HW_CLOCK_AUTO + /* disable the peripheral clock */ + __HAL_RCC_CRYP_CLK_DISABLE(); +#endif +} +#else /* Standard Peripheral Library */ + +int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, + CRYP_KeyInitTypeDef* keyInit) +{ + int ret; word32 keySize; word32* aes_key; @@ -2535,7 +4714,242 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, void wc_Stm32_Aes_Cleanup(void) { } + #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ + +/* CubeMX/HAL CCB ECDSA port -- placed after the build-branch structure and + * guarded on WOLFSSL_STM32_CUBEMX so it compiles only for the HAL build (the + * BARE build provides its own wc_Stm32_Ccb_* above). */ +#if defined(WOLFSSL_STM32_CCB) && defined(WOLFSSL_STM32_CUBEMX) +/* --------------------------------------------------------------------------- + * CCB (Coupling and Chaining Bridge) ECDSA -- CubeMX/HAL path (STM32U3). + * Implements the wolfSSL CCB port via ST's HAL_CCB_* driver. The DHUK is the + * blob encryption key (HAL_CCB_USER_KEY_HW), so the P-256 private scalar never + * enters software. The bare-metal counterpart lives in the BARE branch above. + * ------------------------------------------------------------------------- */ + +/* NIST P-256 parameters (big-endian, 32 bytes). */ +static const byte ccb_p256_aAbs[32] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x03}; +static const byte ccb_p256_b[32] = { + 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7,0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, + 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6,0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; +static const byte ccb_p256_p[32] = { + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; +static const byte ccb_p256_n[32] = { + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84,0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; +static const byte ccb_p256_Gx[32] = { + 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, + 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; +static const byte ccb_p256_Gy[32] = { + 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, + 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; + +static void Stm32Ccb_SetP256(CCB_ECDSACurveParamTypeDef* p) +{ + p->primeOrderSizeByte = 32; + p->modulusSizeByte = 32; + p->coefSignA = 0x00000001u; + p->pAbsCoefA = ccb_p256_aAbs; + p->pCoefB = ccb_p256_b; + p->pModulus = ccb_p256_p; + p->pPrimeOrder = ccb_p256_n; + p->pPointX = ccb_p256_Gx; + p->pPointY = ccb_p256_Gy; +} + +/* Enable the clocks for every peer the CCB chains. HAL_CCB_Init calls the weak + * HAL_CCB_MspInit (empty unless the app provides one), so the port enables them + * itself -- and random.c's HAL_RNG_DeInit may have gated the RNG clock off. */ +static void Stm32Ccb_HalClkEnable(void) +{ + __HAL_RCC_CCB_CLK_ENABLE(); + __HAL_RCC_PKA_CLK_ENABLE(); + __HAL_RCC_SAES_CLK_ENABLE(); + __HAL_RCC_RNG_CLK_ENABLE(); +} + +int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, + byte* iv, byte* tag, byte* wrapped, word32* wrappedSz, + byte* pubX, byte* pubY) +{ + CCB_HandleTypeDef hccb; + CCB_ECDSACurveParamTypeDef param; + CCB_WrappingKeyTypeDef wrap; + CCB_ECDSAKeyBlobTypeDef blob; + CCB_ECCMulPointTypeDef pub; + uint32_t ivW[4]; + uint32_t tagW[4]; + uint32_t wrapW[8]; + int ret = 0; + + if (curveId != ECC_SECP256R1) { + return NOT_COMPILED_IN; + } + if (d == NULL || dLen != 32u || iv == NULL || tag == NULL || + wrapped == NULL || wrappedSz == NULL || pubX == NULL || pubY == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + Stm32Ccb_HalClkEnable(); + XMEMSET(&hccb, 0, sizeof(hccb)); + hccb.Instance = CCB; + if (HAL_CCB_Init(&hccb) != HAL_OK) { + wolfSSL_CryptHwMutexUnLock(); + return WC_HW_E; + } + Stm32Ccb_SetP256(¶m); + XMEMSET(&wrap, 0, sizeof(wrap)); + wrap.WrappingKeyType = HAL_CCB_USER_KEY_HW; + blob.pIV = ivW; + blob.pTag = tagW; + blob.pWrappedKey = wrapW; + + if (HAL_CCB_ECDSA_WrapPrivateKey(&hccb, ¶m, d, &wrap, &blob) != HAL_OK + || hccb.State != HAL_CCB_STATE_READY) { + ret = WC_HW_E; + goto out; + } + pub.pPointX = pubX; + pub.pPointY = pubY; + if (HAL_CCB_ECDSA_ComputePublicKey(&hccb, ¶m, &wrap, &blob, &pub) + != HAL_OK) { + ret = WC_HW_E; + goto out; + } + XMEMCPY(iv, ivW, sizeof(ivW)); + XMEMCPY(tag, tagW, sizeof(tagW)); + XMEMCPY(wrapped, wrapW, sizeof(wrapW)); + *wrappedSz = (word32)sizeof(wrapW); + +out: + (void)HAL_CCB_DeInit(&hccb); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, + const byte* wrapped, word32 wrappedSz, const byte* hash, word32 hashSz, + byte* r, byte* s) +{ + CCB_HandleTypeDef hccb; + CCB_ECDSACurveParamTypeDef param; + CCB_WrappingKeyTypeDef wrap; + CCB_ECDSAKeyBlobTypeDef blob; + CCB_ECDSASignTypeDef sig; + uint32_t ivW[4]; + uint32_t tagW[4]; + uint32_t wrapW[8]; + int ret = 0; + + if (curveId != ECC_SECP256R1) { + return NOT_COMPILED_IN; + } + if (iv == NULL || tag == NULL || wrapped == NULL || wrappedSz != 32u || + hash == NULL || hashSz < 32u || r == NULL || s == NULL) { + return BAD_FUNC_ARG; + } + XMEMCPY(ivW, iv, sizeof(ivW)); + XMEMCPY(tagW, tag, sizeof(tagW)); + XMEMCPY(wrapW, wrapped, sizeof(wrapW)); + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + Stm32Ccb_HalClkEnable(); + XMEMSET(&hccb, 0, sizeof(hccb)); + hccb.Instance = CCB; + if (HAL_CCB_Init(&hccb) != HAL_OK) { + wolfSSL_CryptHwMutexUnLock(); + return WC_HW_E; + } + Stm32Ccb_SetP256(¶m); + XMEMSET(&wrap, 0, sizeof(wrap)); + wrap.WrappingKeyType = HAL_CCB_USER_KEY_HW; + blob.pIV = ivW; + blob.pTag = tagW; + blob.pWrappedKey = wrapW; + sig.pRSign = r; + sig.pSSign = s; + if (HAL_CCB_ECDSA_Sign(&hccb, ¶m, &wrap, &blob, (uint8_t*)hash, &sig) + != HAL_OK) { + ret = WC_HW_E; + } + (void)HAL_CCB_DeInit(&hccb); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +#if defined(WOLF_CRYPTO_CB) && defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) +/* CubeMX CCB crypto-callback device. Transparent DHUK AES/GMAC is bare-only, so + * under the HAL build the CCB-protected ECDSA sign is the only transparent DHUK + * operation. This minimal device routes WC_PK_TYPE_ECDSA_SIGN for a CCB key + * (key->dhuk_is_ccb) to the HAL CCB sign and returns the DER-encoded (r,s); it + * mirrors the bare-metal device's CCB branch so the same wc_ecc_sign_hash flow + * works on both build paths. */ +static int Stm32Ccb_CryptoDevCb(int devId, struct wc_CryptoInfo* info, + void* ctx) +{ + ecc_key* key; + byte r[MAX_ECC_BYTES]; + byte s[MAX_ECC_BYTES]; + word32 sz; + int ret; + + (void)devId; + (void)ctx; + if (info == NULL || info->algo_type != WC_ALGO_TYPE_PK) { + return CRYPTOCB_UNAVAILABLE; + } + /* Transparent provisioning: wc_ecc_make_key() on a WC_DHUK_DEVID key binds + * a fresh CCB-protected blob to it (no CCB-specific API). */ + if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { + return wc_ecc_dev_make_key(info->pk.eckg.rng, info->pk.eckg.size, + info->pk.eckg.key, info->pk.eckg.curveId); + } + if (info->pk.type != WC_PK_TYPE_ECDSA_SIGN) { + return CRYPTOCB_UNAVAILABLE; + } + key = info->pk.eccsign.key; + if (key == NULL || key->dhuk_is_ccb == 0u) { + return CRYPTOCB_UNAVAILABLE; + } + sz = (word32)wc_ecc_size(key); + ret = wc_Stm32_Ccb_EccSign(ECC_SECP256R1, key->ccb_iv, key->ccb_tag, + key->dhuk_wrapped_priv, + key->dhuk_wrapped_priv_len, + info->pk.eccsign.in, info->pk.eccsign.inlen, + r, s); + if (ret == 0) { + ret = wc_ecc_rs_raw_to_sig(r, sz, s, sz, + info->pk.eccsign.out, + info->pk.eccsign.outlen); + } + ForceZero(r, sizeof(r)); + ForceZero(s, sizeof(s)); + return ret; +} + +/* Register / unregister the STM32 DHUK/CCB device for the CubeMX build. Same + * name and contract as the bare-metal version so callers are build-agnostic. */ +int wc_Stm32_DhukRegister(int devId) +{ + return wc_CryptoCb_RegisterDevice(devId, Stm32Ccb_CryptoDevCb, NULL); +} + +void wc_Stm32_DhukUnRegister(int devId) +{ + wc_CryptoCb_UnRegisterDevice(devId); +} +#endif /* WOLF_CRYPTO_CB && HAVE_ECC && HAVE_ECC_SIGN */ +#endif /* WOLFSSL_STM32_CCB && WOLFSSL_STM32_CUBEMX */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ @@ -2660,10 +5074,8 @@ static int stm32_get_from_hexstr(const char* hex, uint8_t* dst, int sz) return stm32_getabs_from_hexstr(hex, dst, sz, NULL); } -/* STM32 PKA supports up to 640-bit numbers */ -#ifndef STM32_MAX_ECC_SIZE -#define STM32_MAX_ECC_SIZE (80) -#endif +/* STM32 PKA supports up to 640-bit numbers; STM32_MAX_ECC_SIZE is defined in + * wolfssl/wolfcrypt/port/st/stm32.h. */ #ifdef WOLFSSL_STM32_PKA_V2 /* find curve based on prime/modulus and return order/coefB */ diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index bfb4ba86791..970ec1d9be2 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -623,6 +623,38 @@ struct ecc_key { #ifdef WC_ECC_NONBLOCK ecc_nb_ctx_t* nb_ctx; #endif +#ifdef WOLFSSL_DHUK + /* DHUK ECC sign: the ECC private scalar, AES-encrypted with the device key + * that the SAES derives (from the 256-bit seed below) inside the hardware. + * At sign time it is decrypted into a short-lived buffer; the device key + * never enters software. + * - dhuk_wrapped_priv -- the wrapped scalar. Length is a multiple of + * 16; 96 bytes covers P-521 (66 padded to 80) plus headroom. + * - dhuk_seed -- 256-bit derivation seed (mixed with the + * silicon DHUK to derive the unwrap key). + * - dhuk_wrapped_priv_len -- wrapped blob length. + * - dhuk_plain_priv_len -- actual scalar size in bytes (32 P-256, + * 48 P-384, 66 P-521). + * - dhuk_seed_sz -- seed length (must be 32). + * Set via wc_ecc_import_wrapped_private(); enable the device by setting + * devId at init (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). */ + byte dhuk_wrapped_priv[96]; + byte dhuk_seed[32]; + word32 dhuk_wrapped_priv_len; + word32 dhuk_plain_priv_len; + word32 dhuk_seed_sz; +#ifdef WOLFSSL_STM32_CCB + /* CCB (Coupling and Chaining Bridge) ECDSA blob. The wrapped scalar reuses + * dhuk_wrapped_priv (+ dhuk_wrapped_priv_len); the AES-GCM blob IV and tag + * are here; the public key is the standard key->pubkey. dhuk_is_ccb selects + * the CCB sign path in the crypto callback. Provisioned on-device by the + * standard wc_ecc_make_key() (intercepted in the crypto callback) or loaded + * via wc_ecc_import_wrapped_private_ex(). */ + byte ccb_iv[16]; + byte ccb_tag[16]; + byte dhuk_is_ccb; +#endif +#endif }; #ifndef WOLFSSL_ECC_BLIND_K @@ -731,6 +763,55 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen, WOLFSSL_API int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s); +#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_BARE) && \ + defined(WC_STM32_HAS_DHUK) && \ + defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_VERIFY_ONLY) +/* DHUK ECC sign: import a hardware-wrapped ECC private scalar + its derivation + * seed onto the ecc_key for the crypto-callback sign path. The caller MUST also + * populate key->pubkey (via wc_ecc_import_x963) so verify can use the + * in-clear public counterpart, and enable the device by setting devId at init + * (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). + * seed -- 256-bit derivation seed (mixed with the silicon DHUK to + * derive the key that unwraps the scalar) + * seedSz -- seed length, must be 32 + * wrapped -- ECC scalar AES-encrypted with the SAES-derived device key; + * length is a multiple of 16, <= 96 + * wrappedLen -- length of the wrapped blob + * plainLen -- actual scalar size (e.g. 32 for P-256) + * + * On success: stores seed + blob + lengths, returns 0 (does NOT set devId). + * On failure: BAD_FUNC_ARG. */ +WOLFSSL_API +int wc_ecc_import_wrapped_private(ecc_key* key, const byte* seed, word32 seedSz, + const byte* wrapped, word32 wrappedLen, + word32 plainLen); +#endif + +#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_CCB) +/* STM32 CCB (Coupling and Chaining Bridge) ECDSA, HW DHUK->PKA. The private + * scalar is wrapped in an AES-GCM "blob" that only the device's CCB can unwrap + * into the PKA -- it never enters software. + * + * Provisioning is transparent through the standard ECC API: init the key with + * WC_DHUK_DEVID (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)) and call the normal + * wc_ecc_make_key() -- the STM32 crypto callback intercepts keygen and binds a + * fresh device blob to the key (no CCB-specific public API). wc_ecc_sign_hash() + * then signs through the same callback. + * + * wc_ecc_import_wrapped_private_ex: restore a previously provisioned blob (the + * wrapped scalar + AES-GCM iv/tag + public key) onto a WC_DHUK_DEVID key. */ +WOLFSSL_API +int wc_ecc_import_wrapped_private_ex(ecc_key* key, int curve_id, + const byte* wrapped, word32 wrappedLen, + const byte* iv, word32 ivLen, + const byte* tag, word32 tagLen, + const byte* pub, word32 pubLen); +/* Internal: crypto-callback keygen handler -- binds a fresh device blob to the + * key using the supplied rng. Not a public entry point; callers reach it via + * wc_ecc_make_key() on a WC_DHUK_DEVID key. */ +WOLFSSL_LOCAL +int wc_ecc_dev_make_key(WC_RNG* rng, int keysize, ecc_key* key, int curve_id); +#endif #if defined(WOLFSSL_ECDSA_DETERMINISTIC_K) || \ defined(WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT) WOLFSSL_API diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 54784dd6181..0f9507f9824 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -40,6 +40,18 @@ #define WC_STM32_PKA_VERIFY_ONLY #endif +/* STM32C5: the protected ECDSA SIGN (mode 0x24) works on the HW PKA (armed via + * wc_stm32_pka_arm_mode), but the plain ECDSA VERIFY (mode 0x26) has an + * unresolved wolfSSL-context failure (it returns OUT_RESULT=0 for a known-good + * signature, while the bare-metal probe verifies the same operands correctly). + * ECDSA verify is a public operation (no secret), so route it to software while + * keeping HW sign. Sign-only is the mirror of verify-only above. + * Define WC_STM32_PKA_SIGN_ONLY yourself to force this on any part. */ +#if defined(WOLFSSL_STM32_PKA) && defined(WOLFSSL_STM32C5) && \ + !defined(WC_STM32_PKA_SIGN_ONLY) && !defined(WC_STM32_PKA_VERIFY_ONLY) + #define WC_STM32_PKA_SIGN_ONLY +#endif + #ifdef WOLFSSL_STM32_BARE /* Per-family direct-register clock-enable macros. CMSIS device header is * already included via settings.h. RCC->...ENR bit names come from CMSIS. @@ -160,6 +172,13 @@ #define WC_STM32_HASH_CLK_DISABLE() \ WC_STM32_CLK_DIS(AHB2ENR1, RCC_AHB2ENR1_HASHEN) #endif + /* CCB (Coupling and Chaining Bridge) clock -- U3 only. */ + #ifdef RCC_AHB2ENR1_CCBEN + #define WC_STM32_CCB_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_CCBEN) + #define WC_STM32_CCB_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR1, RCC_AHB2ENR1_CCBEN) + #endif #define WC_STM32_RNG_CLK_ENABLE() \ WC_STM32_CLK_EN(AHB2ENR1, RCC_AHB2ENR1_RNGEN) #elif defined(WOLFSSL_STM32G0) @@ -238,6 +257,13 @@ WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_HASHEN) #define WC_STM32_RNG_CLK_ENABLE() \ WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_RNGEN) + /* CCB (Coupling and Chaining Bridge) clock -- C5 (un-banked AHB2ENR). */ + #ifdef RCC_AHB2ENR_CCBEN + #define WC_STM32_CCB_CLK_ENABLE() \ + WC_STM32_CLK_EN(AHB2ENR, RCC_AHB2ENR_CCBEN) + #define WC_STM32_CCB_CLK_DISABLE() \ + WC_STM32_CLK_DIS(AHB2ENR, RCC_AHB2ENR_CCBEN) + #endif #elif defined(WOLFSSL_STM32U0) /* U0: Cortex-M0+ low-end. AES + RNG only (no SAES, no HASH, no PKA, * no CRYP). Both on the single AHBENR. TinyAES IP, KEYSIZE field @@ -772,6 +798,117 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif /* STM32_CRYPTO */ +/* DHUK (Device Hardware Unique Key) -- SAES key wrap / unwrap using a + * silicon-bound key. Originally introduced for STM32U5 only; the + * underlying SAES + DHUK infrastructure is also present on U3, H5, + * WBA, and C5. Use WOLFSSL_DHUK going forward; WOLFSSL_STM32U5_DHUK + * is kept as a backwards-compatible alias for one release cycle. */ +#if defined(WOLFSSL_STM32U5_DHUK) && !defined(WOLFSSL_DHUK) + #define WOLFSSL_DHUK +#endif +#if defined(WOLFSSL_DHUK) && !defined(WOLFSSL_STM32U5_DHUK) + #define WOLFSSL_STM32U5_DHUK +#endif + +/* Family gate: only families that actually have SAES + DHUK silicon. + * L5 has a "secure AES" instance but its CR layout does not include + * KMOD / KEYSEL fields -- it does not implement the same DHUK key- + * wrap protocol as U5/U3/H5/WBA/C5. L5 is intentionally excluded. */ +#if defined(WOLFSSL_DHUK) && \ + (defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32WBA) || \ + defined(WOLFSSL_STM32C5) || defined(WOLFSSL_STM32H7S)) + #define WC_STM32_HAS_DHUK +#endif + +/* CCB (Coupling and Chaining Bridge) gate: STM32U3 (e.g. U385, RM0487 ch 31) + * and STM32C5 (e.g. C5A3, RM0522) carry the CCB peripheral that chains + * PKA <-> SAES <-> RNG over a local interconnect, so a DHUK-protected private + * key is used by the PKA without ever entering software / crossing the system + * bus. The shared bare OPSTEP state machine handles both; the family + * differences are the RCC reset-register names (WC_STM32_CCB_* below) and the + * SAES GCM-phase field name (CPHASE on C5 vs GCMPH on U3, abstracted via + * WC_STM32_AES_CR_PHASE below). STM32H5 also has CCB but is not enabled here. + * U5 / WBA do not have CCB. */ +#if defined(WOLFSSL_DHUK) && \ + (defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32C5)) + #define WC_STM32_HAS_CCB +#endif + +/* CCB register-name differences across families, so the bare CCB OPSTEP driver + * stays family-neutral (the state machine is shared; only these names differ). + * U3 uses the banked AHB2*1 RCC names + CCB_SR_BUSY; C5 uses the un-banked + * AHB2* names + CCB_SR_CCB_BUSY. */ +#ifdef WC_STM32_HAS_CCB + #if defined(WOLFSSL_STM32C5) + #define WC_STM32_CCB_SR_BUSY CCB_SR_CCB_BUSY + #define WC_STM32_CCB_RSTR AHB2RSTR + #define WC_STM32_CCB_RST_PKA RCC_AHB2RSTR_PKARST + #define WC_STM32_CCB_RST_SAES RCC_AHB2RSTR_SAESRST + #define WC_STM32_CCB_RST_RNG RCC_AHB2RSTR_RNGRST + #else /* WOLFSSL_STM32U3 */ + #define WC_STM32_CCB_SR_BUSY CCB_SR_BUSY + #define WC_STM32_CCB_RSTR AHB2RSTR1 + #define WC_STM32_CCB_RST_PKA RCC_AHB2RSTR1_PKARST + #define WC_STM32_CCB_RST_SAES RCC_AHB2RSTR1_SAESRST + #define WC_STM32_CCB_RST_RNG RCC_AHB2RSTR1_RNGRST + #endif + + /* SAES GCM/chaining-phase field in AES_CR (bits 14:13): the STM32C5 CMSIS + * names it CPHASE; U3 and the other CRYP/SAES parts name it GCMPH. Same bit + * positions and values, name only. Self-selects on symbol presence. */ + #ifdef AES_CR_CPHASE + #define WC_STM32_AES_CR_PHASE AES_CR_CPHASE + #define WC_STM32_AES_CR_PHASE_0 AES_CR_CPHASE_0 + #define WC_STM32_AES_CR_PHASE_1 AES_CR_CPHASE_1 + #else + #define WC_STM32_AES_CR_PHASE AES_CR_GCMPH + #define WC_STM32_AES_CR_PHASE_0 AES_CR_GCMPH_0 + #define WC_STM32_AES_CR_PHASE_1 AES_CR_GCMPH_1 + #endif +#endif + +/* WOLFSSL_STM32_CCB opts in to the CCB-protected ECDSA path (on-device blob + * create + use). Supported on both build paths: the bare-metal direct-register + * driver (WOLFSSL_STM32_BARE) and the CubeMX/HAL path (WOLFSSL_STM32_CUBEMX, + * via ST's HAL_CCB_* driver). Requires CCB silicon (STM32U3 or STM32C5). */ +#if defined(WOLFSSL_STM32_CCB) + #if !defined(WOLFSSL_STM32_BARE) && !defined(WOLFSSL_STM32_CUBEMX) + #error "WOLFSSL_STM32_CCB requires WOLFSSL_STM32_BARE or WOLFSSL_STM32_CUBEMX" + #endif + #if !defined(WC_STM32_HAS_CCB) + #error "WOLFSSL_STM32_CCB requires CCB silicon (STM32U3/U385 or STM32C5/C5A3)" + #endif +#endif + +/* Per-coordinate scratch size for the PKA/CCB ECDSA operand buffers (bytes). + * Sized for the largest supported curve plus PKA padding headroom. Defined + * here so the PKA and point-op TUs in stm32.c share one value. */ +#ifndef STM32_MAX_ECC_SIZE + #define STM32_MAX_ECC_SIZE (80) +#endif + +/* Transparent DHUK crypto flows through the crypto-callback framework, so + * WOLF_CRYPTO_CB is mandatory whenever DHUK is enabled. */ +#if defined(WOLFSSL_DHUK) && !defined(WOLF_CRYPTO_CB) + #error "WOLFSSL_DHUK requires WOLF_CRYPTO_CB (crypto callback dispatch)" +#endif + +#if defined(WOLFSSL_DHUK) && !defined(WOLFSSL_DHUK_DEVID) + /* SAES / DHUK device IDs. wc_Stm32_Aes_Wrap selects the wrap-key source + * by aes->devId (HW DHUK vs a software key). Transparent DHUK crypto + * routes through the crypto-callback device registered at WC_DHUK_DEVID + * (see wc_Stm32_DhukRegister), not these markers. */ + #define WOLFSSL_DHUK_DEVID 808 + #define WOLFSSL_SAES_DEVID 807 + /* Crypto-callback device id for transparent DHUK crypto (same value as the + * SAES/DHUK marker; override before include if it collides). */ + #ifndef WC_DHUK_DEVID + #define WC_DHUK_DEVID 808 + #endif + + int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, + word32* outSz, const byte* iv, int ivSz); #ifdef WOLFSSL_STM32_BARE /* Optional exact-key import primitive: unwrap a DHUK-wrapped key into SAES * KEYR and ECB/CBC with it. _ex `isCbc`: 0=ECB, 1=CBC. Returns @@ -785,6 +922,67 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #if defined(WOLFSSL_STM32_PKA) && defined(HAVE_ECC) +struct ecc_key; +struct WC_RNG; + +int stm32_ecc_verify_hash_ex(MATH_INT_T *r, MATH_INT_T *s, const byte* hash, + word32 hashlen, int* res, struct ecc_key* key); + +int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, struct WC_RNG* rng, + struct ecc_key* key, MATH_INT_T *r, MATH_INT_T *s); +#endif /* WOLFSSL_STM32_PKA && HAVE_ECC */ + + +/* DHUK BARE port: the STM32 crypto-callback device. Built on families with + * SAES + DHUK (the WC_STM32_HAS_DHUK gate); transparent DHUK crypto (AES / + * GMAC / ECDSA) routes through it via the cryptocb path. */ +#if defined(WOLFSSL_STM32_BARE) && defined(WC_STM32_HAS_DHUK) + +#ifdef WOLF_CRYPTO_CB + /* Register / unregister the STM32 DHUK device. After registering at + * WC_DHUK_DEVID, set an object's devId to it at init + * (wc_AesInit / wc_ecc_init_ex) and supply the 256-bit seed as the key + * (wc_AesGcmSetKey) or via wc_ecc_import_wrapped_private(). */ + int wc_Stm32_DhukRegister(int devId); + void wc_Stm32_DhukUnRegister(int devId); +#endif + +#endif /* WOLFSSL_STM32_BARE && WC_STM32_HAS_DHUK */ + +/* CubeMX CCB build: DHUK AES/GMAC is bare-only, but the CCB-protected ECDSA + * sign routes through the crypto-callback device too, so expose the same + * register/unregister entry points under the HAL build. */ +#if defined(WOLFSSL_STM32_CUBEMX) && defined(WOLFSSL_STM32_CCB) && \ + defined(WOLF_CRYPTO_CB) + int wc_Stm32_DhukRegister(int devId); + void wc_Stm32_DhukUnRegister(int devId); +#endif + +/* CCB (Coupling and Chaining Bridge) HW-protected DHUK->PKA ECDSA -- STM32U3 + * (e.g. U385). Available on both build paths: WOLFSSL_STM32_BARE (direct + * register driver) and WOLFSSL_STM32_CUBEMX (ST HAL_CCB_* driver). The blob is + * an AES-GCM authenticated wrap of the ECC private scalar under the CCB-active + * DHUK; the scalar never enters software. Currently P-256 (ECC_SECP256R1). */ +#ifdef WOLFSSL_STM32_CCB + /* Bare-only: bring up the CCB and report usability (clocks + IPRST + BUSY + * clear, no OPERR). Returns 0 on success. */ + int wc_Stm32_CcbInit(void); + + /* Create a CCB ECDSA-signature blob from a clear private scalar d (and its + * derived public key) on-device. The scalar is wrapped under the DHUK; the + * blob (iv[16] + tag[16] + wrapped d) and public key (pubX[32]/pubY[32]) + * are returned. The HW self-verifies the blob before returning. */ + int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, + byte* iv, byte* tag, byte* wrapped, word32* wrappedSz, + byte* pubX, byte* pubY); + + /* Sign hash with a CCB ECDSA blob. The scalar is unwrapped inside the + * hardware (SAES->PKA over the CCB local bus) and never enters software. + * r[32]/s[32] receive the signature. */ + int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, + const byte* wrapped, word32 wrappedSz, const byte* hash, word32 hashSz, + byte* r, byte* s); +#endif #endif /* _WOLFPORT_STM32_H_ */ From e215de093d94ad1c2a7bfab00a8f63ef21cfba8c Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 25 Jun 2026 12:38:11 -0700 Subject: [PATCH 3/3] Add STM32 CubeMX DHUK/CCB support, docs and matrix --- .wolfssl_known_macro_extras | 6 +- doc/dox_comments/header_files/ecc.h | 10 +- doc/dox_comments/header_files/stm32.h | 30 +- wolfcrypt/src/aes.c | 8 +- wolfcrypt/src/ecc.c | 122 +-- wolfcrypt/src/port/st/README.md | 20 +- wolfcrypt/src/port/st/stm32.c | 1064 +++++++++++++++++++++---- wolfcrypt/src/random.c | 16 +- wolfssl/wolfcrypt/ecc.h | 9 +- wolfssl/wolfcrypt/port/st/stm32.h | 18 +- 10 files changed, 1073 insertions(+), 230 deletions(-) diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 2a2c9668038..3e05dfb9769 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -1,6 +1,7 @@ AES AES1 AES_CR_CCFC +AES_CR_CPHASE AES_GCM_GMULT_NCT AES_ICR_CCF AES_ISR_CCF @@ -525,6 +526,7 @@ RCC_AHB2ENR1_HASHEN RCC_AHB2ENR1_PKAEN RCC_AHB2ENR1_SAESEN RCC_AHB2ENR_AESEN +RCC_AHB2ENR_CCBEN RCC_AHB2ENR_HASHEN RCC_AHB2ENR_PKAEN RCC_AHB2ENR_SAESEN @@ -987,8 +989,8 @@ WOLFSSL_STM32C5 WOLFSSL_STM32F3 WOLFSSL_STM32F427_RNG WOLFSSL_STM32U0 -WOLFSSL_STM32_CCB WOLFSSL_STM32_DHUK_UNWRAP +WOLFSSL_STM32_RNG_LEGACY_FAILFAST WOLFSSL_STM32_USE_SAES WOLFSSL_STRONGEST_HASH_SIG WOLFSSL_STSAFE_TAKES_SLOT @@ -1115,6 +1117,8 @@ __FreeBSD__ __GLIBC__ __GNUC_MINOR__ __GNUC__ +__HAL_RCC_AES_CLK_ENABLE +__HAL_RCC_SAES_CLK_ENABLE __HP_cc __IAR_SYSTEMS_ICC__ __ICCARM__ diff --git a/doc/dox_comments/header_files/ecc.h b/doc/dox_comments/header_files/ecc.h index e103555780b..9bb6ba33051 100644 --- a/doc/dox_comments/header_files/ecc.h +++ b/doc/dox_comments/header_files/ecc.h @@ -1291,10 +1291,12 @@ int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub, a chip-bound wrapped blob together with the 256-bit derivation seed; the plaintext scalar is never imported. The key must be bound to the STM32 DHUK crypto-callback device (init with wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID) - after registering the device with wc_Stm32_DhukRegister). Available only on - STM32 bare-metal builds (WOLFSSL_STM32_BARE) with WOLFSSL_DHUK, a - DHUK-capable SAES (WC_STM32_HAS_DHUK), and HW PKA signing enabled - (WOLFSSL_STM32_PKA, not WC_STM32_PKA_VERIFY_ONLY). + after registering the device with wc_Stm32_DhukRegister). Available on the + STM32 bare-metal (WOLFSSL_STM32_BARE) and CubeMX/HAL (WOLFSSL_STM32_CUBEMX) + build paths with WOLFSSL_DHUK. This setter is a pure import (no STM32 PKA + dependency, and is used on the DHUK-capable SAES families); the matching + ECDSA sign uses the HW PKA, so signing additionally requires + WOLFSSL_STM32_PKA (and not WC_STM32_PKA_VERIFY_ONLY). \return 0 Returned on success. \return BAD_FUNC_ARG Returned if key, seed, or wrapped is NULL; if seedSz is diff --git a/doc/dox_comments/header_files/stm32.h b/doc/dox_comments/header_files/stm32.h index 40ccbf8645e..c8cfecdd455 100644 --- a/doc/dox_comments/header_files/stm32.h +++ b/doc/dox_comments/header_files/stm32.h @@ -5,10 +5,11 @@ crypto-callback device. After registering at WC_DHUK_DEVID, bind an object to the device by setting its devId at init (wc_AesInit / wc_ecc_init_ex) and supply the per-key 256-bit seed as the key (wc_AesGcmSetKey) or via - wc_ecc_import_wrapped_private; normal wolfCrypt AES / GMAC / ECDSA calls then - run transparently with the working key derived inside the SAES. Available on - STM32 builds with WOLFSSL_DHUK, WOLF_CRYPTO_CB, and a DHUK-capable SAES - (WC_STM32_HAS_DHUK); on the CubeMX path it is also provided for CCB ECDSA. + wc_ecc_import_wrapped_private; normal wolfCrypt AES / GMAC / ECDSA calls + then run transparently with the working key derived inside the SAES. + Available on STM32 builds with WOLFSSL_DHUK, WOLF_CRYPTO_CB, and a + DHUK-capable SAES (WC_STM32_HAS_DHUK); on the CubeMX path it is also + provided for CCB ECDSA. \return 0 Returned on success. \return <0 A negative error code is returned if device registration fails. @@ -35,8 +36,8 @@ int wc_Stm32_DhukRegister(int devId); \ingroup STM32 \brief This function unregisters the STM32 DHUK crypto-callback device that - was registered with wc_Stm32_DhukRegister. Call it once transparent DHUK / CCB - operations are complete. + was registered with wc_Stm32_DhukRegister. Call it once transparent + DHUK / CCB operations are complete. \return none No return value. @@ -62,9 +63,9 @@ void wc_Stm32_DhukUnRegister(int devId); builds with WOLFSSL_DHUK and a DHUK-capable SAES. \return 0 Returned on success. - \return BAD_FUNC_ARG Returned if a required pointer is NULL, if inSz is not a - supported block size, if the iv is non-NULL with ivSz != 16, or (software-key - path) if the wrapping key length is not 16 or 32. + \return BAD_FUNC_ARG Returned if a required pointer is NULL, if inSz is + not a supported block size, if the iv is non-NULL with ivSz != 16, or + (software-key path) if the wrapping key length is not 16 or 32. \return <0 A negative error code may be returned on a hardware error. \param aes pointer to an initialized Aes; aes->devId selects the wrap key. @@ -86,10 +87,10 @@ int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, \brief This function brings up the STM32 CCB (Coupling and Chaining Bridge) peripheral and reports whether it is usable: it enables the CCB / PKA / SAES / RNG clocks, pulse-resets the engines, waits for BUSY to clear, and checks - for an operation error. Bare-metal only (WOLFSSL_STM32_BARE). The transparent - CCB sign path calls this internally, so most callers do not invoke it - directly. Available on STM32 builds with WOLFSSL_STM32_CCB on CCB silicon - (STM32U3 or STM32C5). + for an operation error. Bare-metal only (WOLFSSL_STM32_BARE). The + transparent CCB sign path calls this internally, so most callers do not + invoke it directly. Available on STM32 builds with WOLFSSL_STM32_CCB on + CCB silicon (STM32U3 or STM32C5). \return 0 Returned when the CCB is up and usable. \return WC_TIMEOUT_E Returned if BUSY does not clear within the timeout. @@ -115,7 +116,8 @@ int wc_Stm32_CcbInit(void); \return 0 Returned on success. \return NOT_COMPILED_IN Returned if curveId is an unsupported curve. - \return BAD_FUNC_ARG Returned if a required pointer is NULL or dLen is wrong. + \return BAD_FUNC_ARG Returned if a required pointer is NULL or dLen is + wrong. \return WC_TIMEOUT_E Returned if a hardware step times out. \param curveId the ECC curve id; currently ECC_SECP256R1. diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 8d99481ca63..07bedf534cc 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -4167,8 +4167,9 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock, #ifdef WOLF_CRYPTO_CB /* Keep a raw (non-reversed) copy for crypto-callback offload, e.g. the * DHUK device reads the seed from devKey. Mirrors the generic - * wc_AesSetKey cryptocb path: only for a cryptocb-bound key, and reject - * an oversized key (matches the other devKey copy sites in this file). */ + * wc_AesSetKey cryptocb path: only for a cryptocb-bound key, and + * reject an oversized key (matches the other devKey copy sites in + * this file). */ if (aes->devId != INVALID_DEVID) { if (keylen > sizeof(aes->devKey)) { return BAD_FUNC_ARG; @@ -5747,7 +5748,8 @@ int wc_AesSetIV(Aes* aes, const byte* iv) * callback). CBC has no crypto-callback entry on the BARE path, so a * DHUK key -- devId == WC_DHUK_DEVID, where wc_AesSetKey stored the * derivation seed in aes->key -- would run with the seed as the AES - * key. Reject rather than silently produce a non-device-bound result. */ + * key. Reject rather than silently produce a non-device-bound + * result. */ if (aes->devId == WC_DHUK_DEVID) { return NOT_COMPILED_IN; } diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 14c0c3e9494..421437b3480 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -288,9 +288,9 @@ ECC Curve Sizes: * verify (mode 0x26) correctly, so use the SW verify helper rather than the * HW-accelerator sigRS path. Note the helper's scalar multiplications still * run on the HW PKA generic-mul (stm32.c provides wc_ecc_mulmod_ex under - * !WC_STM32_PKA_VERIFY_ONLY) -- the C5 issue is specifically the verify-mode - * wrapper, not the point math, which is exercised and correct (the lead for - * a future real HW verify). */ + * !WC_STM32_PKA_VERIFY_ONLY) -- the C5 issue is specifically the + * verify-mode wrapper, not the point math, which is exercised and correct + * (the lead for a future real HW verify). */ #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -7032,56 +7032,20 @@ static int deterministic_sign_helper(const byte* in, word32 inlen, ecc_key* key) #endif /* WOLFSSL_ECDSA_DETERMINISTIC_K || WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT */ -/* WOLFSSL_STM32_PKA routes HW ECDSA sign/verify through the STM32 PKA - * (HAL_PKA_ECDSASign / Verify). Works under both the CubeMX-HAL path - * and the bare-metal direct-register path (WOLFSSL_STM32_BARE) -- the - * bare-metal driver implements the same HAL_PKA_ECDSA* surface. - * - * The non-FIPS input-validation checks (length range, all-zero digest - * rejection) live inside the SW body of wc_ecc_sign_hash_ex below. - * Since the STM32_PKA branch returns early without reaching them, - * mirror those checks here so HW + SW paths share the same input - * contract. Without this, an all-zero digest reaches the PKA IP and - * succeeds at the HW layer -- the wolfcrypt_test ECC sweep then fails - * at the post-call assertion that expected ECC_BAD_ARG_E for a zero - * digest. */ -/* The STM32H563 "light" PKA can verify but not sign (per ST: H563 - * verify-only, H573 full). WC_STM32_PKA_VERIFY_ONLY routes sign to the - * software path (the #elif branch below) while verify stays on the HW PKA. */ -#if defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_VERIFY_ONLY) -int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, - ecc_key* key, mp_int *r, mp_int *s) -{ -#ifndef WC_ALLOW_ECC_ZERO_HASH - byte hashIsZero = 0; - word32 zIdx; -#endif - - if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) { - return ECC_BAD_ARG_E; - } - if ((inlen > WC_MAX_DIGEST_SIZE) || (inlen < WC_MIN_DIGEST_SIZE)) { - return BAD_LENGTH_E; - } -#ifndef WC_ALLOW_ECC_ZERO_HASH - /* reject all 0's hash */ - for (zIdx = 0; zIdx < inlen; zIdx++) - hashIsZero |= in[zIdx]; - if (hashIsZero == 0) - return ECC_BAD_ARG_E; -#endif - - return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s); -} - -#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_BARE) && \ - defined(WC_STM32_HAS_DHUK) +/* Gated to match the ecc.h prototype: user-visible macros only (WOLFSSL_DHUK + + * build-flavor), NOT the port-internal WC_STM32_HAS_DHUK, so the WOLFSSL_API + * prototype in ecc.h is in scope at this definition. The DHUK key fields it + * sets are themselves WOLFSSL_DHUK-gated in ecc.h. */ +#if defined(WOLFSSL_DHUK) && \ + (defined(WOLFSSL_STM32_BARE) || defined(WOLFSSL_STM32_CUBEMX)) /* Import a hardware-wrapped ECC private scalar + its derivation seed onto the * ecc_key for the DHUK crypto-callback sign path. The scalar is AES-encrypted * (offline or on-chip) with the device key that the SAES derives from the seed; * at sign time it is decrypted into a short-lived buffer. The devId is NOT set * here -- enable the device by setting devId at init - * (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). See ecc.h for the contract. */ + * (wc_ecc_init_ex(&key, heap, WC_DHUK_DEVID)). See ecc.h for the contract. + * This is a pure setter (no STM32 PKA dependency), so it is available on both + * the bare-metal and CubeMX/HAL DHUK build paths. */ int wc_ecc_import_wrapped_private(ecc_key* key, const byte* seed, word32 seedSz, const byte* wrapped, word32 wrappedLen, word32 plainLen) @@ -7122,7 +7086,49 @@ int wc_ecc_import_wrapped_private(ecc_key* key, const byte* seed, word32 seedSz, #endif return 0; } -#endif /* WOLFSSL_DHUK && WOLFSSL_STM32_BARE && WC_STM32_HAS_DHUK */ +#endif /* WOLFSSL_DHUK && (BARE || CUBEMX) */ + +/* WOLFSSL_STM32_PKA routes HW ECDSA sign/verify through the STM32 PKA + * (HAL_PKA_ECDSASign / Verify). Works under both the CubeMX-HAL path + * and the bare-metal direct-register path (WOLFSSL_STM32_BARE) -- the + * bare-metal driver implements the same HAL_PKA_ECDSA* surface. + * + * The non-FIPS input-validation checks (length range, all-zero digest + * rejection) live inside the SW body of wc_ecc_sign_hash_ex below. + * Since the STM32_PKA branch returns early without reaching them, + * mirror those checks here so HW + SW paths share the same input + * contract. Without this, an all-zero digest reaches the PKA IP and + * succeeds at the HW layer -- the wolfcrypt_test ECC sweep then fails + * at the post-call assertion that expected ECC_BAD_ARG_E for a zero + * digest. */ +/* The STM32H563 "light" PKA can verify but not sign (per ST: H563 + * verify-only, H573 full). WC_STM32_PKA_VERIFY_ONLY routes sign to the + * software path (the #elif branch below) while verify stays on the HW PKA. */ +#if defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_VERIFY_ONLY) +int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, + ecc_key* key, mp_int *r, mp_int *s) +{ +#ifndef WC_ALLOW_ECC_ZERO_HASH + byte hashIsZero = 0; + word32 zIdx; +#endif + + if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) { + return ECC_BAD_ARG_E; + } + if ((inlen > WC_MAX_DIGEST_SIZE) || (inlen < WC_MIN_DIGEST_SIZE)) { + return BAD_LENGTH_E; + } +#ifndef WC_ALLOW_ECC_ZERO_HASH + /* reject all 0's hash */ + for (zIdx = 0; zIdx < inlen; zIdx++) + hashIsZero |= in[zIdx]; + if (hashIsZero == 0) + return ECC_BAD_ARG_E; +#endif + + return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s); +} #elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_MICROCHIP_TA100) && \ @@ -8264,13 +8270,25 @@ int wc_ecc_dev_make_key(WC_RNG* rng, int keysize, ecc_key* key, int curve_id) } } if (ret == 0) { - ret = wc_ecc_import_wrapped_private_ex(key, curve_id, wrapped, wrappedSz, - iv, (word32)sizeof(iv), tag, + ret = wc_ecc_import_wrapped_private_ex(key, curve_id, wrapped, + wrappedSz, iv, (word32)sizeof(iv), tag, (word32)sizeof(tag), pub, (word32)(2 * modSz)); } - ForceZero(d, MAX_ECC_BYTES); + /* Clear all sensitive material: the plaintext scalar (d) and the wrapped + * private-scalar blob (wrapped). In the small-stack path d/pub/wrapped all + * live in scratch, so clear it all; iv/tag are always on the stack. */ +#ifdef WOLFSSL_SMALL_STACK + if (scratch != NULL) { + ForceZero(scratch, (3 * MAX_ECC_BYTES) + 96); + } +#else + ForceZero(d, sizeof(d)); + ForceZero(wrapped, sizeof(wrapped)); +#endif + ForceZero(iv, sizeof(iv)); + ForceZero(tag, sizeof(tag)); if (tmpInit) { wc_ecc_free(tmp); } diff --git a/wolfcrypt/src/port/st/README.md b/wolfcrypt/src/port/st/README.md index d83c1001a37..349fa3cf756 100644 --- a/wolfcrypt/src/port/st/README.md +++ b/wolfcrypt/src/port/st/README.md @@ -91,6 +91,17 @@ Include `` before any other wolfSSL headers. If bu See our [benchmarks](https://www.wolfssl.com/docs/benchmarks/) page for canonical numbers. For per-silicon BARE-vs-CubeMX comparisons across the NUCLEO board matrix, see the bench tables in the `STM32_Bare_Test/README.md` of the examples repo (PR #13). +BARE vs CubeMX on the SAES+DHUK boards (wolfCrypt benchmark, symmetric in MiB/s, ECDSA P-256 sign in ops/sec; `bare / cubemx`): + +| Board (NUCLEO) | SYSCLK | AES-128-CBC | AES-128-GCM | SHA-256 | ECDSA P-256 sign | +|-----------------|---------|-------------|-------------|-------------|------------------| +| U385RG-Q (U3) | 96 MHz | 9.46 / 4.91 | 0.50 / 4.41 | 7.04 / 11.66| 33.83 / 5.48 | +| U545RE-Q (U5) | 96 MHz | 11.18 / 5.40| 0.73 / 5.02 | 12.52 / 20.21| 34.16 / 34.45 | +| U585 (U5) | 96 MHz | 11.55 / 5.61| 0.73 / 5.22 | 12.76 / 21.97| 34.16 / 34.45 | +| C5A3ZG (C5) | 144 MHz | 16.56 / 16.58| 1.05 / 1.05| 2.00 / 1.99 | 51.03 / 51.03 | + +Takeaways: (1) On STM32C5 the CubeMX build keeps wolfcrypt on the register crypto path (C5's new-generation HAL has no classic crypto driver), so its bare and CubeMX columns are identical. (2) On the U5 family the CubeMX HAL-PKA path reaches HW parity for ECDSA sign (34.45 vs 34.16 bare). (3) On U3 the CubeMX ECDSA is ~6x slower because the HAL PKA include-chain does not yet cover U3 and it falls back to SW math (the bare path uses HW V2 PKA). (4) AES-128-GCM is 7-9x faster under HAL on the U5 family because the bare U5 path does SW GHASH + HW ECB while the HAL drives full HW GCM. (5) Because DHUK and CCB run the same SAES register sequence on both build flavors, their throughput tracks the bare column. Captured on real hardware; raw logs in the examples repo `STM32_Bare_Test/bench_logs/`. + ## STM32 RNG @@ -133,11 +144,11 @@ A DHUK-protected key is driven by a per-key 256-bit seed. The SAES derives the d #define WOLF_CRYPTO_CB /* required -- DHUK routes through crypto callbacks */ ``` -`WC_STM32_HAS_DHUK` is auto-defined for the SAES+DHUK families when `WOLFSSL_DHUK` is set; other families compile out the DHUK code. `WOLFSSL_STM32_BARE` selects the bare-metal SAES backend. +`WC_STM32_HAS_DHUK` is auto-defined for the SAES+DHUK families when `WOLFSSL_DHUK` is set; other families compile out the DHUK code. DHUK works on both the bare-metal (`WOLFSSL_STM32_BARE`) and CubeMX/HAL (`WOLFSSL_STM32_CUBEMX`) build paths. Both drive the SAES key-derivation through the same direct-register sequence, so the derived device key is bit-identical across build flavors (validated on STM32U385, STM32U545 and STM32C5A3: the GMAC tag and AES-ECB ciphertext match between the bare and CubeMX builds for the same seed). On the classic-HAL families (U3/U5/H5) the CubeMX path uses the HAL for clock/UART/RNG bring-up and, for seed-based ECDSA sign, the HAL PKA. STM32C5 ships ST's new-generation HAL (no classic crypto driver APIs), so there the CubeMX build keeps wolfcrypt on the register path entirely and the HAL is used only for board bring-up -- the DHUK result is still bit-identical to the C5 bare build. Because both flavors execute the same SAES register sequence, transparent DHUK AES/GMAC and CCB-sign throughput is the same on the bare and CubeMX builds (on STM32C5A3 the bare and CubeMX benchmark columns are equal to within noise). ### Migration from WOLFSSL_STM32U5_DHUK -`WOLFSSL_STM32U5_DHUK` is now an alias for this `WOLFSSL_DHUK` crypto-callback model and requires `WOLF_CRYPTO_CB` (a `#error` fires otherwise). The previous experimental inline path -- wrapped-key AES handled directly inside `wc_AesEncrypt` / `wc_AesDecrypt` / `wc_AesCbcEncrypt` / `wc_AesCbcDecrypt`, plus `wc_Stm32_Aes_SetDHUK_IV()`, `wc_Stm32_Aes_UnWrap()`, and the `Aes.dhukIV` / `dhukIVLen` members -- has been removed (fail-loud: code referencing those symbols no longer compiles). Migrate to the devId model shown below: register the device, init with `WC_DHUK_DEVID`, and use the normal `wc_Aes*` / `wc_ecc_*` APIs. Note that transparent DHUK AES/GMAC is bare-only (`WOLFSSL_STM32_BARE`); on the CubeMX/HAL path the crypto callback covers CCB ECDSA sign/keygen only. +`WOLFSSL_STM32U5_DHUK` is now an alias for this `WOLFSSL_DHUK` crypto-callback model and requires `WOLF_CRYPTO_CB` (a `#error` fires otherwise). The previous experimental inline path -- wrapped-key AES handled directly inside `wc_AesEncrypt` / `wc_AesDecrypt` / `wc_AesCbcEncrypt` / `wc_AesCbcDecrypt`, plus `wc_Stm32_Aes_SetDHUK_IV()`, `wc_Stm32_Aes_UnWrap()`, and the `Aes.dhukIV` / `dhukIVLen` members -- has been removed (fail-loud: code referencing those symbols no longer compiles). Migrate to the devId model shown below: register the device, init with `WC_DHUK_DEVID`, and use the normal `wc_Aes*` / `wc_ecc_*` APIs. Transparent DHUK AES/GMAC and seed-based ECDSA sign are available on both the bare-metal (`WOLFSSL_STM32_BARE`) and CubeMX/HAL (`WOLFSSL_STM32_CUBEMX`) build paths; on the CubeMX path the crypto callback additionally provisions and signs with a CCB-protected key (`wc_ecc_make_key` keygen + sign) where the CCB peripheral is present. ### API @@ -225,7 +236,7 @@ Worked example: `STM32_Bare_Test/src/main_ccb.c` (examples repo, PR #13) runs th - Validated on STM32U385 (NUCLEO-U385RG-Q, TZEN=0), P-256, on both the bare-metal and CubeMX/HAL build paths: `wc_ecc_make_key` -> `wc_ecc_sign_hash` -> `wc_ecc_verify_hash` round-trips, with the private scalar never present in software. - Also validated on STM32C5 (NUCLEO-C5A3ZG, TZEN=0), P-256, bare-metal: the same `wc_ecc_make_key` -> `wc_ecc_sign_hash` -> `wc_ecc_verify_hash` flow plus a persisted-blob re-import (`wc_ecc_import_wrapped_private_ex`) round-trip, all on the CCB hardware. On STM32C5 the blob-create step is a combined create-and-sign: the C5 OPSTEP machine only advances through the GCM-final phase when the random k is drawn and the PKA sign is started during creation (the r,s are a by-product and discarded). That extra sequence is gated by `WOLFSSL_STM32C5` in the bare driver; the U3 OPSTEP machine does not require it. -- `Stm32Ccb_Init()` pulse-resets the PKA / SAES / RNG before each operation, so the first CCB op is robust even when prior standalone crypto (RNG seeding, ECC keygen) left an engine in a state that would otherwise stall the CCB's chained SAES GCM step. The family-specific reset register name is abstracted (`WC_STM32_CCB_RSTR`). +- `wc_Stm32_CcbInit()` pulse-resets the PKA / SAES / RNG before each operation, so the first CCB op is robust even when prior standalone crypto (RNG seeding, ECC keygen) left an engine in a state that would otherwise stall the CCB's chained SAES GCM step. The family-specific reset register name is abstracted (`WC_STM32_CCB_RSTR`). - CCB requires the U3 / C5 at its full clock; the reference clock-tree bring-up is in the bare example's `boards/u3/hw_init.c` (96 MHz) and `boards/c5a3/hw_init.c`. @@ -241,7 +252,8 @@ The caller is responsible for: 1. Clock-tree bring-up (HSI/HSE, PLL, voltage scaling, flash latency). 2. UART / VCP bring-up for stdout. -3. Peripheral clock-enable for the IP blocks you use (RNG, CRYP/SAES, HASH, PKA). + +For `WOLFSSL_STM32_BARE` builds wolfcrypt enables the per-IP peripheral clocks it needs (RNG, CRYP/SAES, HASH, PKA) internally via the `WC_STM32_*_CLK_ENABLE` macros in `stm32.h`, so the caller does not have to enable those AHB/APB peripheral clocks itself. In return wolfcrypt drives the IP-block registers directly. Family-specific arms in `wolfssl/wolfcrypt/port/st/stm32.h` handle the per-chip register-name differences (e.g. `RCC->AHB2ENR` vs `RCC->AHB2ENR1`, `D2CCIP2R` vs `CDCCIP2R`). diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index e22d60624de..f848f433183 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -2999,7 +2999,8 @@ static int Stm32Dhuk_Gmac(const byte* seed, const byte* iv, word32 ivSz, goto exit; } - /* Derive the DHUK-bound working key into SAES KEYR from the caller's seed. */ + /* Derive the DHUK-bound working key into SAES KEYR from the caller's + * seed. */ ret = Stm32SaesDeriveKeyFromSeed(seed, 32u); if (ret != 0) { goto exit; @@ -3584,8 +3585,12 @@ static int Stm32Dhuk_Cipher(struct wc_CryptoInfo* info) } #endif /* !NO_AES */ -#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && defined(WOLFSSL_STM32_PKA) -/* Route an ECDSA sign request to the SAES/PKA backend. */ +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && \ + (defined(WOLFSSL_STM32_PKA) || defined(WOLFSSL_STM32_CCB)) +/* Route an ECDSA sign request to the SAES/PKA backend. The CCB-protected sign + * drives the PKA through the CCB peripheral and needs no PKA HAL/shim, so it is + * available whenever WOLFSSL_STM32_CCB is set; the seed-DHUK (non-CCB) sign + * needs the standalone PKA (WOLFSSL_STM32_PKA). */ static int Stm32Dhuk_PkSign(struct wc_CryptoInfo* info) { ecc_key* key = info->pk.eccsign.key; @@ -3595,7 +3600,8 @@ static int Stm32Dhuk_PkSign(struct wc_CryptoInfo* info) } #ifdef WOLFSSL_STM32_CCB /* CCB-protected key: the scalar is unwrapped SAES->PKA in hardware and the - * signature returned as raw (r,s); encode it as the DER ECDSA-Sig output. */ + * signature returned as raw (r,s); encode it as the DER ECDSA-Sig + * output. */ if (key->dhuk_is_ccb) { byte r[MAX_ECC_BYTES]; byte s[MAX_ECC_BYTES]; @@ -3620,13 +3626,20 @@ static int Stm32Dhuk_PkSign(struct wc_CryptoInfo* info) if (key->dhuk_seed_sz != 32u) { return CRYPTOCB_UNAVAILABLE; } +#ifdef WOLFSSL_STM32_PKA /* Stm32Dhuk_Sign reads key->dhuk_seed directly under the HW mutex. */ return Stm32Dhuk_Sign(NULL, key, info->pk.eccsign.in, info->pk.eccsign.inlen, info->pk.eccsign.out, info->pk.eccsign.outlen, info->pk.eccsign.rng); +#else + /* Seed-DHUK sign needs the standalone PKA; without it report unavailable + * (a CCB-only build only HW-signs CCB-provisioned keys, handled above). */ + return CRYPTOCB_UNAVAILABLE; +#endif } -#endif /* HAVE_ECC && HAVE_ECC_SIGN && WOLFSSL_STM32_PKA */ +#endif /* HAVE_ECC && HAVE_ECC_SIGN && + * (WOLFSSL_STM32_PKA || WOLFSSL_STM32_CCB) */ /* The crypto-callback device entry point (registered by wc_Stm32_DhukRegister). * Returns CRYPTOCB_UNAVAILABLE for anything it does not handle so the caller @@ -3644,20 +3657,25 @@ static int Stm32_CryptoDevCb(int devId, struct wc_CryptoInfo* info, void* ctx) case WC_ALGO_TYPE_CIPHER: return Stm32Dhuk_Cipher(info); #endif -#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && defined(WOLFSSL_STM32_PKA) +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) && \ + (defined(WOLFSSL_STM32_PKA) || defined(WOLFSSL_STM32_CCB)) case WC_ALGO_TYPE_PK: - if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) { - return Stm32Dhuk_PkSign(info); - } #ifdef WOLFSSL_STM32_CCB /* Transparent provisioning: wc_ecc_make_key() on a WC_DHUK_DEVID - * key binds a fresh CCB-protected blob to it (no CCB-specific API). */ + * key binds a fresh CCB-protected blob to it (no CCB-specific + * API). CCB keygen/sign drive the PKA via the CCB peripheral and + * need no WOLFSSL_STM32_PKA -- this matches the CubeMX callback, so + * a WOLFSSL_STM32_CCB build without WOLFSSL_STM32_PKA does NOT + * silently fall back to a plaintext software key. */ if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { return wc_ecc_dev_make_key(info->pk.eckg.rng, info->pk.eckg.size, info->pk.eckg.key, info->pk.eckg.curveId); } #endif + if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) { + return Stm32Dhuk_PkSign(info); + } return CRYPTOCB_UNAVAILABLE; #endif default: @@ -3806,7 +3824,7 @@ int wc_Stm32_CcbInit(void) #define WC_CCB_OP_CREATE 0x000000C0u /* CCOP: ECDSA CPU blob create */ #define WC_CCB_OP_SCALAR_USE 0x00000081u /* CCOP: scalar mul (pubkey) */ #define WC_CCB_PKA_SIGN_MODE 0x24u /* PKA_CR.MODE for ECDSA sign */ -#define WC_CCB_PKA_MUL_MODE 0x20u /* PKA_CR.MODE for ECC scalar mul */ +#define WC_CCB_PKA_MUL_MODE 0x20u /* PKA_CR.MODE for ECC mul */ #define WC_CCB_MAGIC 0x0CCBu /* SAES->PKA chaining magic */ #define WC_CCB_FAKE 0x0001u /* placeholder fed to RNG->PKA */ #define WC_CCB_PKA_OK 0x0000D60Du /* PKA_ECDSA_SIGN_OUT_ERROR ok */ @@ -3816,27 +3834,38 @@ int wc_Stm32_CcbInit(void) * opsz is not a multiple of 4 = 8. (Single-curve P-256 today.) */ #define WC_CCB_P256_OPSZ 10u #define WC_CCB_P256_CIPSZ 8u -/* SAES GCM final-phase header length word for the blob (bit-length encoding). */ +/* SAES GCM final-phase header length word for the blob (bit-length + * encoding). */ #define WC_CCB_GCM_HDR_LEN(opsz) (((((opsz) * 32u) * 6u) + (3u * 64u)) * 2u) /* NIST P-256 parameters (big-endian, 32 bytes). */ static const byte wc_ccb_p256_aAbs[32] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x03}; static const byte wc_ccb_p256_b[32] = { - 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7,0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, - 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6,0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; + 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7, + 0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, + 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6, + 0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; static const byte wc_ccb_p256_p[32] = { - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; static const byte wc_ccb_p256_n[32] = { - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84,0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84, + 0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; static const byte wc_ccb_p256_Gx[32] = { - 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, - 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; + 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47, + 0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, + 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0, + 0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; static const byte wc_ccb_p256_Gy[32] = { - 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, - 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; + 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b, + 0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, + 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce, + 0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; /* Wait until CCB_SR.OPSTEP reaches the given step value. */ static int Stm32Ccb_WaitOpStep(word32 step) @@ -3971,9 +4000,10 @@ static int Stm32Ccb_SetParam(word32 slot, const byte* src, word32 sizeBytes) for (off = 0u; off < (operand - 2u); off += 2u) { p = &src[sizeBytes - ((off * 4u) + 1u)]; WC_CCB_PKA_RAMW[slot + off] = (word32)p[0] | ((word32)p[-1] << 8) | - ((word32)p[-2] << 16) | ((word32)p[-3] << 24); - WC_CCB_PKA_RAMW[slot + off + 1u] = (word32)p[-4] | ((word32)p[-5] << 8) | - ((word32)p[-6] << 16) | ((word32)p[-7] << 24); + ((word32)p[-2] << 16) | ((word32)p[-3] << 24); + WC_CCB_PKA_RAMW[slot + off + 1u] = (word32)p[-4] | + ((word32)p[-5] << 8) | ((word32)p[-6] << 16) | + ((word32)p[-7] << 24); ret = Stm32Ccb_SaesWaitCcf(); if (ret != 0) { return ret; @@ -4033,7 +4063,8 @@ static int Stm32Ccb_LoadCurve(void) wc_ccb_p256_Gy, 32u); } -/* Wait until a SAES_SR flag reaches the wanted state (used for BUSY/KEYVALID). */ +/* Wait until a SAES_SR flag reaches the wanted state (used for + * BUSY/KEYVALID). */ static int Stm32Ccb_SaesWaitSr(word32 flag, int wantSet) { word32 t = 0; @@ -4071,8 +4102,8 @@ static int Stm32Ccb_OpBegin(word32 ccop, word32 pkaMode) /* Load the DHUK blob key into SAES (KEYSEL=HW, 256-bit, GCM) and wait for the * CCB to advance. isUse=1 selects decrypt (MODE_1) for blob use (sign / pubkey) - * and the CCB reaches OPSTEP 0x12; isUse=0 selects encrypt for blob creation and - * the CCB reaches OPSTEP 0x02. */ + * and the CCB reaches OPSTEP 0x12; isUse=0 selects encrypt for blob creation + * and the CCB reaches OPSTEP 0x02. */ static int Stm32Ccb_LoadBlobKey(int isUse) { word32 cr = AES_CR_KEYSEL_0 | AES_CR_KEYSIZE | STM32_AES_CHMOD_GCM; @@ -4167,6 +4198,33 @@ static int Stm32Ccb_GcmFinalTagCheck(word32 opsz, word32 cipsz) return 0; } +/* Feed the wrapped scalar to the SAES; the CCB substitutes the decrypted key + * into PKA RAM where WC_CCB_MAGIC is written (SAES->PKA chaining). Shared by + * the scalar-mul (pubkey) and ECDSA-sign paths; only ramSlot differs. */ +static int Stm32Ccb_FeedWrappedScalar(word32 ramSlot, const word32* wrapw, + word32 cipsz) +{ + word32 off; + word32 block; + word32 i; + int ret; + + block = 0u; + for (off = 0u; off < cipsz; off++) { + SAES->DINR = wrapw[cipsz - 1u - off]; + if ((off % 4u) == 3u) { + if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { + return ret; + } + for (i = 0u; i < 4u; i++) { + WC_CCB_PKA_RAMW[ramSlot + block + i] = WC_CCB_MAGIC; + } + block += 4u; + } + } + return 0; +} + /* Bare CCB public-key computation: scalar mult d*G via the blob. Mirrors the * blob-use sign path, but loads the unwrapped scalar into the K slot (no random * k) and reads the resulting point into pubX/pubY. */ @@ -4178,9 +4236,6 @@ static int Stm32Ccb_ComputePub(const byte* iv, const byte* tag, word32 wrapw[8]; word32 opsz; word32 cipsz; - word32 off; - word32 block; - word32 i; int ret; XMEMCPY(ivw, iv, sizeof(ivw)); @@ -4201,10 +4256,11 @@ static int Stm32Ccb_ComputePub(const byte* iv, const byte* tag, SAES->CR |= AES_CR_EN; if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } Stm32Ccb_LoadRefTag(tagw); - /* SAES GCM/chaining-phase field: STM32C5 names it AES_CR_CPHASE, U3 names it - * AES_CR_GCMPH -- same bit positions/values. WC_STM32_AES_CR_PHASE abstracts - * the name (port/st/stm32.h). This was the one genuinely-divergent part of - * the OPSTEP driver; the rest is already family-neutral (WC_STM32_CCB_*). */ + /* SAES GCM/chaining-phase field: STM32C5 names it AES_CR_CPHASE, U3 + * names it AES_CR_GCMPH -- same bit positions/values. + * WC_STM32_AES_CR_PHASE abstracts the name (port/st/stm32.h). This was + * the one genuinely-divergent part of the OPSTEP driver; the rest is + * already family-neutral (WC_STM32_CCB_*). */ SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; if ((ret = Stm32Ccb_WaitOpStep(0x13u)) != 0) { goto done; } @@ -4213,17 +4269,8 @@ static int Stm32Ccb_ComputePub(const byte* iv, const byte* tag, SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; if ((ret = Stm32Ccb_WaitOpStep(0x14u)) != 0) { goto done; } - block = 0u; - for (off = 0u; off < cipsz; off++) { - SAES->DINR = wrapw[cipsz - 1u - off]; - if ((off % 4u) == 3u) { - if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } - for (i = 0u; i < 4u; i++) { - WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + block + i] = WC_CCB_MAGIC; - } - block += 4u; - } - } + if ((ret = Stm32Ccb_FeedWrappedScalar(PKA_ECC_SCALAR_MUL_IN_K, wrapw, + cipsz)) != 0) { goto done; } WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + cipsz] = 0u; WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_IN_K + cipsz + 1u] = 0u; if ((ret = Stm32Ccb_WaitOpStep(0x17u)) != 0) { goto done; } @@ -4236,8 +4283,10 @@ static int Stm32Ccb_ComputePub(const byte* iv, const byte* tag, if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_PROCENDF)) != 0) { goto done; } if ((ret = Stm32Ccb_WaitOpStep(0x1Au)) != 0) { goto done; } - wc_stm32_pka_read_be(pubX, &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], 32u); - wc_stm32_pka_read_be(pubY, &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], 32u); + wc_stm32_pka_read_be(pubX, + &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], 32u); + wc_stm32_pka_read_be(pubY, + &WC_CCB_PKA_RAMW[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], 32u); ret = 0; done: @@ -4270,7 +4319,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, return NOT_COMPILED_IN; } /* Require pubX/pubY non-NULL to match the CubeMX/HAL implementation's - * contract (one public API, one NULL-handling rule across build flavors). */ + * contract (one public API, one NULL-handling rule across build + * flavors). */ if (d == NULL || dLen != 32u || iv == NULL || tag == NULL || wrapped == NULL || wrappedSz == NULL || pubX == NULL || pubY == NULL) { return BAD_FUNC_ARG; @@ -4282,7 +4332,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, if (ret != 0) { return ret; } - /* CCOP = ECDSA CPU blob creation; load the DHUK blob key in encrypt mode. */ + /* CCOP = ECDSA CPU blob creation; load the DHUK blob key in encrypt + * mode. */ ret = Stm32Ccb_OpBegin(WC_CCB_OP_CREATE, WC_CCB_PKA_SIGN_MODE); if (ret != 0) { goto done; } if ((ret = Stm32Ccb_LoadBlobKey(0 /* create */)) != 0) { goto done; } @@ -4304,7 +4355,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, ivw[0] = SAES->IVR0; SAES->CR |= AES_CR_EN; if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } - SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | + WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; if ((ret = Stm32Ccb_WaitOpStep(0x03u)) != 0) { goto done; } /* Curve params (coupled), then GCM payload phase. */ @@ -4312,7 +4364,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; if ((ret = Stm32Ccb_WaitOpStep(0x04u)) != 0) { goto done; } - /* CPU writes the clear scalar d into PKA RAM (BE->LE words from the end). */ + /* CPU writes the clear scalar d into PKA RAM (BE->LE words from the + * end). */ PKA->CLRFR = PKA_CLRFR_CMFC; for (off = 0u; off < (opsz - 2u); off += 2u) { Stm32Ccb_Wr64(PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + off, @@ -4348,7 +4401,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, * PKA sign is started. Draw k, run the GCM final phase, read the tag, then * START the PKA; the resulting r,s are a creation by-product and discarded * (the blob is still {iv, tag, wrapped}). Mirrors the C5 HAL - * CCB_ECDSA_SignBlobCreation. The U3 OPSTEP machine does not require this. */ + * CCB_ECDSA_SignBlobCreation. The U3 OPSTEP machine does not require + * this. */ if ((ret = Stm32Ccb_WaitOpStep(0x09u)) != 0) { goto done; } for (off = 0u; off < (opsz - 2u); off++) { if ((ret = Stm32Ccb_RngWaitDrdy()) != 0) { goto done; } @@ -4359,7 +4413,8 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_K + (opsz - 2u) + 1u] = 0u; if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_RNGOKF)) != 0) { goto done; } if ((ret = Stm32Ccb_SaesWaitBusy()) != 0) { goto done; } - SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | + WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; PKA->CLRFR = PKA_CLRFR_CMFC; SAES->DINR = 0u; SAES->DINR = WC_CCB_GCM_HDR_LEN(opsz); @@ -4377,8 +4432,10 @@ int wc_Stm32_Ccb_EccMakeBlob(int curveId, const byte* d, word32 dLen, goto done; } #else - /* GCM final phase: feed the length block and read the authentication tag. */ - SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; + /* GCM final phase: feed the length block and read the authentication + * tag. */ + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | + WC_STM32_AES_CR_PHASE_0 | WC_STM32_AES_CR_PHASE_1; if ((ret = Stm32Ccb_WaitOpStep(0x0Au)) != 0) { goto done; } PKA->CLRFR = PKA_CLRFR_CMFC; SAES->DINR = 0u; @@ -4421,17 +4478,27 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, word32 opsz; word32 cipsz; word32 off; - word32 block; - word32 i; + byte hb[32]; int ret; if (curveId != ECC_SECP256R1) { return NOT_COMPILED_IN; } if (iv == NULL || tag == NULL || wrapped == NULL || wrappedSz != 32u || - hash == NULL || hashSz < 32u || r == NULL || s == NULL) { + hash == NULL || hashSz == 0u || r == NULL || s == NULL) { return BAD_FUNC_ARG; } + /* Normalize the digest to the 32-byte P-256 field: a longer hash is + * truncated to its leftmost 32 bytes, a shorter one (SHA-224/SHA-1) is + * left-padded with leading zeros -- matching the seed-DHUK Stm32Dhuk_Sign + * path so both transparent-sign paths accept sub-32-byte digests. */ + XMEMSET(hb, 0, sizeof(hb)); + if (hashSz >= 32u) { + XMEMCPY(hb, hash, 32); + } + else { + XMEMCPY(hb + (32u - hashSz), hash, hashSz); + } XMEMCPY(ivw, iv, sizeof(ivw)); XMEMCPY(tagw, tag, sizeof(tagw)); XMEMCPY(wrapw, wrapped, sizeof(wrapw)); @@ -4443,13 +4510,14 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, if (ret != 0) { return ret; } - /* CCOP = ECDSA blob-use sign; load the DHUK blob key in decrypt (use) mode. */ + /* CCOP = ECDSA blob-use sign; load the DHUK blob key in decrypt (use) + * mode. */ ret = Stm32Ccb_OpBegin(WC_CCB_OP_SIGN_USE, WC_CCB_PKA_SIGN_MODE); if (ret != 0) { goto done; } if ((ret = Stm32Ccb_LoadBlobKey(1 /* use */)) != 0) { goto done; } /* Hash -> PKA RAM (plain BE->LE words, no terminator, not yet coupled). */ - Stm32Ccb_LoadHash(hash); + Stm32Ccb_LoadHash(hb); /* Blob-use initial phase: load IV, GCM init, write reference tag. */ Stm32Ccb_LoadIv(ivw); @@ -4457,7 +4525,8 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } Stm32Ccb_LoadRefTag(tagw); /* GCM header phase (keep EN) -> OPSTEP 0x12 -> 0x13. */ - SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; + SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | + WC_STM32_AES_CR_PHASE_0 | AES_CR_EN; if ((ret = Stm32Ccb_WaitOpStep(0x13u)) != 0) { goto done; } /* ECDSA curve params into PKA RAM (each coupled to SAES, wait CCF). */ @@ -4467,20 +4536,9 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, SAES->CR = (SAES->CR & ~WC_STM32_AES_CR_PHASE) | WC_STM32_AES_CR_PHASE_1; if ((ret = Stm32Ccb_WaitOpStep(0x14u)) != 0) { goto done; } - /* Feed the wrapped scalar to SAES; the CCB substitutes the decrypted key - * into PKA RAM where the magic value is written (SAES->PKA chaining). */ - block = 0u; - for (off = 0u; off < cipsz; off++) { - SAES->DINR = wrapw[cipsz - 1u - off]; - if ((off % 4u) == 3u) { - if ((ret = Stm32Ccb_SaesWaitCcf()) != 0) { goto done; } - for (i = 0u; i < 4u; i++) { - WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + block + i] = - WC_CCB_MAGIC; - } - block += 4u; - } - } + /* Feed the wrapped scalar into the private-key D slot. */ + if ((ret = Stm32Ccb_FeedWrappedScalar(PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D, + wrapw, cipsz)) != 0) { goto done; } WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz] = 0u; WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + cipsz + 1u] = 0u; if ((ret = Stm32Ccb_PkaWaitFlag(PKA_SR_DATAOKF)) != 0) { goto done; } @@ -4510,8 +4568,10 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, ret = WC_HW_E; goto done; } - wc_stm32_pka_read_be(r, &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], 32u); - wc_stm32_pka_read_be(s, &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], 32u); + wc_stm32_pka_read_be(r, + &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], 32u); + wc_stm32_pka_read_be(s, + &WC_CCB_PKA_RAMW[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], 32u); ret = 0; done: @@ -4717,10 +4777,16 @@ void wc_Stm32_Aes_Cleanup(void) #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ -/* CubeMX/HAL CCB ECDSA port -- placed after the build-branch structure and +/* CubeMX/HAL DHUK + CCB port -- placed after the build-branch structure and * guarded on WOLFSSL_STM32_CUBEMX so it compiles only for the HAL build (the - * BARE build provides its own wc_Stm32_Ccb_* above). */ -#if defined(WOLFSSL_STM32_CCB) && defined(WOLFSSL_STM32_CUBEMX) + * BARE build provides its own wc_Stm32_Ccb_* / DHUK above). The transparent + * DHUK symmetric+sign primitives drive ST's HAL_CRYPEx (SAES) / HAL_AES (C5) + * and HAL_PKA; the CCB-protected ECDSA primitives drive ST's HAL_CCB_*. A + * single crypto-callback device serves both, matching the bare build. */ +#if defined(WOLFSSL_STM32_CUBEMX) && \ + (defined(WOLFSSL_STM32_CCB) || defined(WC_STM32_HAS_DHUK)) + +#if defined(WOLFSSL_STM32_CCB) /* --------------------------------------------------------------------------- * CCB (Coupling and Chaining Bridge) ECDSA -- CubeMX/HAL path (STM32U3). * Implements the wolfSSL CCB port via ST's HAL_CCB_* driver. The DHUK is the @@ -4732,20 +4798,30 @@ void wc_Stm32_Aes_Cleanup(void) static const byte ccb_p256_aAbs[32] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x03}; static const byte ccb_p256_b[32] = { - 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7,0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, - 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6,0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; + 0x5a,0xc6,0x35,0xd8,0xaa,0x3a,0x93,0xe7, + 0xb3,0xeb,0xbd,0x55,0x76,0x98,0x86,0xbc, + 0x65,0x1d,0x06,0xb0,0xcc,0x53,0xb0,0xf6, + 0x3b,0xce,0x3c,0x3e,0x27,0xd2,0x60,0x4b}; static const byte ccb_p256_p[32] = { - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x01, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; static const byte ccb_p256_n[32] = { - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84,0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xbc,0xe6,0xfa,0xad,0xa7,0x17,0x9e,0x84, + 0xf3,0xb9,0xca,0xc2,0xfc,0x63,0x25,0x51}; static const byte ccb_p256_Gx[32] = { - 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, - 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; + 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47, + 0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2, + 0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0, + 0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96}; static const byte ccb_p256_Gy[32] = { - 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, - 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; + 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b, + 0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16, + 0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce, + 0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5}; static void Stm32Ccb_SetP256(CCB_ECDSACurveParamTypeDef* p) { @@ -4846,15 +4922,25 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, uint32_t ivW[4]; uint32_t tagW[4]; uint32_t wrapW[8]; + byte hb[32]; int ret = 0; if (curveId != ECC_SECP256R1) { return NOT_COMPILED_IN; } if (iv == NULL || tag == NULL || wrapped == NULL || wrappedSz != 32u || - hash == NULL || hashSz < 32u || r == NULL || s == NULL) { + hash == NULL || hashSz == 0u || r == NULL || s == NULL) { return BAD_FUNC_ARG; } + /* Normalize the digest to 32 bytes (truncate longer, left-pad shorter), + * matching the bare CCB and seed-DHUK sign paths. */ + XMEMSET(hb, 0, sizeof(hb)); + if (hashSz >= 32u) { + XMEMCPY(hb, hash, 32); + } + else { + XMEMCPY(hb + (32u - hashSz), hash, hashSz); + } XMEMCPY(ivW, iv, sizeof(ivW)); XMEMCPY(tagW, tag, sizeof(tagW)); XMEMCPY(wrapW, wrapped, sizeof(wrapW)); @@ -4878,7 +4964,7 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, blob.pWrappedKey = wrapW; sig.pRSign = r; sig.pSSign = s; - if (HAL_CCB_ECDSA_Sign(&hccb, ¶m, &wrap, &blob, (uint8_t*)hash, &sig) + if (HAL_CCB_ECDSA_Sign(&hccb, ¶m, &wrap, &blob, hb, &sig) != HAL_OK) { ret = WC_HW_E; } @@ -4886,54 +4972,762 @@ int wc_Stm32_Ccb_EccSign(int curveId, const byte* iv, const byte* tag, wolfSSL_CryptHwMutexUnLock(); return ret; } +#endif /* WOLFSSL_STM32_CCB */ -#if defined(WOLF_CRYPTO_CB) && defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) -/* CubeMX CCB crypto-callback device. Transparent DHUK AES/GMAC is bare-only, so - * under the HAL build the CCB-protected ECDSA sign is the only transparent DHUK - * operation. This minimal device routes WC_PK_TYPE_ECDSA_SIGN for a CCB key - * (key->dhuk_is_ccb) to the HAL CCB sign and returns the DER-encoded (r,s); it - * mirrors the bare-metal device's CCB branch so the same wc_ecc_sign_hash flow - * works on both build paths. */ -static int Stm32Ccb_CryptoDevCb(int devId, struct wc_CryptoInfo* info, - void* ctx) +#if defined(WC_STM32_HAS_DHUK) +/* --------------------------------------------------------------------------- + * Transparent DHUK symmetric (AES-ECB/CBC, GMAC) -- CubeMX build. + * Derives a device-bound working key from a 256-bit seed inside SAES (the key + * never appears in software for symmetric ops), then runs AES with that key + * resident in KEYR. The SAES is driven through the SAME direct-register + * sequence the bare build uses (KMOD=WRAPPED, KEYSEL=HW key-derivation, then + * NORMAL-key ECB/CBC), NOT ST's HAL_CRYPEx_UnwrapKey. This is deliberate: + * HAL_CRYPEx_UnwrapKey derives a DIFFERENT (though valid and deterministic) + * device key for the same seed, so a seed/blob provisioned under one build + * would not be usable under the other. The register path is build-flavour + * agnostic (pure MMIO on identical silicon) and gives bit-identical results in + * the bare and CubeMX builds -- validated on STM32U385 (GMAC tag + AES-ECB + * ciphertext match across both builds). The HAL is still used for clock-tree, + * RNG seeding and (for ECDSA) PKA. STM32C5 (separate HAL_AES driver) is a + * follow-on. SAES register/flag names come from the CMSIS device header. + * ------------------------------------------------------------------------- */ + +#ifndef STM32_DHUK_TIMEOUT +#define STM32_DHUK_TIMEOUT 0x000FFFFFu +#endif + +/* AES modes for Stm32Dhuk_AesCubeMX. The bare definitions live in the BARE + * branch, which is not compiled on the CubeMX path. */ +#ifndef WC_DHUK_MODE_ECB +#define WC_DHUK_MODE_ECB 0 +#endif +#ifndef WC_DHUK_MODE_CBC +#define WC_DHUK_MODE_CBC 1 +#endif + +#if defined(AES_ISR_CCF) && defined(AES_ICR_CCF) + #define WC_DHUK_CCF_PENDING() ((SAES->ISR & AES_ISR_CCF) == 0u) + #define WC_DHUK_CCF_CLEAR() do { SAES->ICR = AES_ICR_CCF; } while (0) +#elif defined(AES_SR_CCF) && defined(AES_CR_CCFC) + #define WC_DHUK_CCF_PENDING() ((SAES->SR & AES_SR_CCF) == 0u) + #define WC_DHUK_CCF_CLEAR() do { SAES->CR |= AES_CR_CCFC; } while (0) +#else + #error "STM32 CubeMX DHUK: SAES CCF flag macros not available for this part" +#endif + +/* Bring up the peripherals the SAES DHUK path needs in the CubeMX build: + * RNG (the SAES self-init pulls entropy from it; random.c's HAL_RNG_DeInit may + * have gated it off), the SAES kernel clock, then wait out the SAES self-init + * (SR.BUSY). Mirrors the bare Stm32SaesEnsureRng + WC_STM32_SAES_CLK_ENABLE + + * Stm32SaesWaitInit. */ +static int Stm32Dhuk_RegClkInit(void) { - ecc_key* key; - byte r[MAX_ECC_BYTES]; - byte s[MAX_ECC_BYTES]; - word32 sz; - int ret; + word32 spin; - (void)devId; - (void)ctx; - if (info == NULL || info->algo_type != WC_ALGO_TYPE_PK) { - return CRYPTOCB_UNAVAILABLE; +#ifdef __HAL_RCC_RNG_CLK_ENABLE + __HAL_RCC_RNG_CLK_ENABLE(); +#endif + if ((RNG->CR & RNG_CR_RNGEN) == 0u) { + RNG->CR |= RNG_CR_RNGEN; + __DMB(); } - /* Transparent provisioning: wc_ecc_make_key() on a WC_DHUK_DEVID key binds - * a fresh CCB-protected blob to it (no CCB-specific API). */ - if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { - return wc_ecc_dev_make_key(info->pk.eckg.rng, info->pk.eckg.size, - info->pk.eckg.key, info->pk.eckg.curveId); +#ifdef __HAL_RCC_SAES_CLK_ENABLE + __HAL_RCC_SAES_CLK_ENABLE(); +#endif +#ifdef __HAL_RCC_AES_CLK_ENABLE + __HAL_RCC_AES_CLK_ENABLE(); +#endif +#ifdef RCC_CR_SHSION + /* On STM32U5/U3 the SAES kernel clock is the SHSI (secure HSI); it must be + * running or the SAES IP never computes (CCF/BUSY never resolve). ST does + * this in HAL_CRYP_MspInit; mirror the bare Stm32SaesEnsureRng here. */ + if ((RCC->CR & RCC_CR_SHSION) == 0u) { + spin = 0u; + RCC->CR |= RCC_CR_SHSION; + while ((RCC->CR & RCC_CR_SHSIRDY) == 0u) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + break; + } + } + __DMB(); } - if (info->pk.type != WC_PK_TYPE_ECDSA_SIGN) { - return CRYPTOCB_UNAVAILABLE; +#endif + spin = 0u; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + return WC_TIMEOUT_E; + } } - key = info->pk.eccsign.key; - if (key == NULL || key->dhuk_is_ccb == 0u) { - return CRYPTOCB_UNAVAILABLE; + return 0; +} + +/* Derive the DHUK-bound working key into SAES KEYR from a 256-bit seed, via the + * direct register sequence (KMOD=WRAPPED, KEYSEL=HW, MODE=KEYDERIVATION then + * DECRYPT, push the seed; the key passes signal completion via SR.BUSY clearing + * plus SR.KEYVALID, NOT CCF). This is byte-identical to the bare + * Stm32SaesDeriveKeyFromSeed -- driving the SAES through the register sequence + * (rather than HAL_CRYPEx_UnwrapKey) makes the CubeMX build derive the SAME + * device key as the bare build for a given seed. On return EN is cleared and + * the derived key is resident in KEYR. Caller holds the crypto mutex. */ +static int Stm32Dhuk_RegDerive(const byte* seed) +{ + word32 seedWords[8]; + word32 i; + word32 spin; + word32 cr; + int ret = 0; + + XMEMCPY(seedWords, seed, 32); + + WC_DHUK_CCF_CLEAR(); + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | AES_CR_KMOD_0 | + AES_CR_KEYSEL_0 | AES_CR_MODE_0; /* WRAPPED, HW, KEYDERIVATION */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + spin = 0u; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto done; + } } - sz = (word32)wc_ecc_size(key); - ret = wc_Stm32_Ccb_EccSign(ECC_SECP256R1, key->ccb_iv, key->ccb_tag, - key->dhuk_wrapped_priv, - key->dhuk_wrapped_priv_len, - info->pk.eccsign.in, info->pk.eccsign.inlen, - r, s); - if (ret == 0) { - ret = wc_ecc_rs_raw_to_sig(r, sz, s, sz, - info->pk.eccsign.out, - info->pk.eccsign.outlen); + WC_DHUK_CCF_CLEAR(); + + cr = (SAES->CR & ~AES_CR_MODE) | AES_CR_MODE_1; /* MODE = DECRYPT */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + for (i = 0; i < 8u; i += 4u) { + SAES->DINR = seedWords[i + 0u]; + SAES->DINR = seedWords[i + 1u]; + SAES->DINR = seedWords[i + 2u]; + SAES->DINR = seedWords[i + 3u]; + spin = 0u; + __DMB(); + while ((SAES->SR & AES_SR_BUSY) != 0u) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto done; + } + } + WC_DHUK_CCF_CLEAR(); + } + if ((SAES->SR & AES_SR_KEYVALID) == 0u) { + ret = WC_HW_E; + goto done; + } + SAES->CR &= ~AES_CR_EN; + +done: + ForceZero(seedWords, sizeof(seedWords)); + return ret; +} + +/* Run one ECB block (4 words) through SAES with the key resident in KEYR: push + * DINR x4, wait CCF (data passes DO raise CCF), read DOUTR x4 in place, clear + * CCF. CR / EN must already be configured by the caller. Mirrors the bare + * Stm32SaesEcbBlock. */ +static int Stm32Dhuk_RegEcbBlock(word32 buf[4]) +{ + word32 spin = 0u; + + SAES->DINR = buf[0]; + SAES->DINR = buf[1]; + SAES->DINR = buf[2]; + SAES->DINR = buf[3]; + while (WC_DHUK_CCF_PENDING()) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + buf[0] = SAES->DOUTR; + buf[1] = SAES->DOUTR; + buf[2] = SAES->DOUTR; + buf[3] = SAES->DOUTR; + WC_DHUK_CCF_CLEAR(); + return 0; +} + +/* AES-ECB / AES-CBC encrypt or decrypt with a DHUK-derived key, CubeMX build. + * Drives the SAES through the register sequence (Stm32Dhuk_RegDerive + + * Stm32Dhuk_RegEcbBlock) so the result is bit-identical to the bare + * Stm32Dhuk_Aes for a given seed. */ +static int Stm32Dhuk_AesCubeMX(const byte* seed, int mode, int enc, + const byte* in, word32 sz, byte* out, const byte* iv, word32 ivSz) +{ + word32 chmod; + word32 cr; + word32 i; + word32 blocks; + int locked = 0; + int ret; + + if (seed == NULL || in == NULL || out == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0u || (sz % WC_AES_BLOCK_SIZE) != 0u) { + return BAD_FUNC_ARG; } - ForceZero(r, sizeof(r)); - ForceZero(s, sizeof(s)); + if (mode == WC_DHUK_MODE_ECB) { + chmod = 0u; /* CHMOD = ECB */ + } + else if (mode == WC_DHUK_MODE_CBC) { + if (iv == NULL || ivSz != WC_AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + chmod = AES_CR_CHMOD_0; /* CHMOD = CBC */ + } + else { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + locked = 1; + + ret = Stm32Dhuk_RegClkInit(); + if (ret != 0) { + goto exit; + } + ret = Stm32Dhuk_RegDerive(seed); + if (ret != 0) { + goto exit; + } + + /* ECB/CBC with the derived key now in KEYR (KMOD=NORMAL, KEYSEL=NORMAL). + * Decrypt needs a NORMAL-mode KEYDERIVATION prep pass first (this IS a + * data/compute pass and raises CCF, unlike the wrapped-key DHUK derive). */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE | chmod; + if (!enc) { + word32 spin = 0u; + SAES->CR = cr | AES_CR_MODE_0; /* MODE = KEYDERIVATION */ + SAES->CR |= AES_CR_EN; + while (WC_DHUK_CCF_PENDING()) { + if (++spin >= (word32)STM32_DHUK_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto exit; + } + } + WC_DHUK_CCF_CLEAR(); + SAES->CR &= ~AES_CR_EN; + cr |= AES_CR_MODE_1; /* MODE = DECRYPT */ + } + SAES->CR = cr; + + if (chmod == AES_CR_CHMOD_0) { + word32 v[4]; + XMEMSET(v, 0, sizeof(v)); + XMEMCPY(v, iv, WC_AES_BLOCK_SIZE); + ByteReverseWords(v, v, 16); + SAES->IVR3 = v[0]; + SAES->IVR2 = v[1]; + SAES->IVR1 = v[2]; + SAES->IVR0 = v[3]; + ForceZero(v, sizeof(v)); + } + + SAES->CR |= AES_CR_EN; + blocks = sz / WC_AES_BLOCK_SIZE; + for (i = 0; i < blocks; i++) { + word32 buf[4]; + XMEMCPY(buf, in + i * WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + ret = Stm32Dhuk_RegEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(out + i * WC_AES_BLOCK_SIZE, buf, WC_AES_BLOCK_SIZE); + ForceZero(buf, sizeof(buf)); + } + SAES->CR &= ~AES_CR_EN; + ret = 0; + +exit: + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; +#endif + WC_DHUK_CCF_CLEAR(); + if (locked) { + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +/* GMAC tag with a DHUK-derived key, CubeMX build. Mirrors the bare + * Stm32Dhuk_Gmac: derive the key via registers, ECB-encrypt H = AES_Ek(0) and + * Ek_J0 = AES_Ek(J0), then GHASH-over-AAD and tag = GHASH XOR Ek_J0 in + * software. Bit-identical to the bare path for a given seed. */ +static int Stm32Dhuk_GmacCubeMX(const byte* seed, const byte* iv, word32 ivSz, + const byte* aad, word32 aadSz, byte* tag, word32 tagSz) +{ + Gcm* gcmp; +#ifndef WOLFSSL_SMALL_STACK + Gcm gcm_stack; +#endif + byte J0[WC_AES_BLOCK_SIZE]; + byte Ek_J0[WC_AES_BLOCK_SIZE]; + byte Y[WC_AES_BLOCK_SIZE]; + word32 buf[4]; + word32 i; + word32 cr; + int locked = 0; + int ret; + + if (seed == NULL || iv == NULL || tag == NULL) { + return BAD_FUNC_ARG; + } + if (ivSz == 0u) { + return BAD_FUNC_ARG; + } + if (tagSz < 4u || tagSz > WC_AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + if (aad == NULL && aadSz > 0u) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_SMALL_STACK + gcmp = (Gcm*)XMALLOC(sizeof(*gcmp), NULL, DYNAMIC_TYPE_AES); + if (gcmp == NULL) { + return MEMORY_E; + } +#else + gcmp = &gcm_stack; +#endif + XMEMSET(gcmp, 0, sizeof(*gcmp)); + XMEMSET(J0, 0, sizeof(J0)); + XMEMSET(Ek_J0, 0, sizeof(Ek_J0)); + XMEMSET(Y, 0, sizeof(Y)); + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto exit; + } + locked = 1; + + ret = Stm32Dhuk_RegClkInit(); + if (ret != 0) { + goto exit; + } + ret = Stm32Dhuk_RegDerive(seed); + if (ret != 0) { + goto exit; + } + + /* ECB-ENCRYPT with the derived key (KMOD/KEYSEL = NORMAL). */ + cr = AES_CR_DATATYPE_1 | AES_CR_KEYSIZE; /* NORMAL ECB */ + SAES->CR = cr; + SAES->CR |= AES_CR_EN; + + /* H = AES_Ek(0^128) */ + XMEMSET(buf, 0, sizeof(buf)); + ret = Stm32Dhuk_RegEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(gcmp->H, buf, WC_AES_BLOCK_SIZE); +#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) + GenerateM0(gcmp); +#endif + ForceZero(buf, sizeof(buf)); + + /* J0: 12-byte IV fast path, else GHASH-J0 per NIST SP 800-38D. */ + if (ivSz == 12u) { + XMEMCPY(J0, iv, 12); + J0[15] = 0x01; + } + else { + GHASH(gcmp, NULL, 0, iv, ivSz, J0, WC_AES_BLOCK_SIZE); + } + + /* Ek_J0 = AES_Ek(J0) */ + XMEMCPY(buf, J0, WC_AES_BLOCK_SIZE); + ret = Stm32Dhuk_RegEcbBlock(buf); + if (ret != 0) { + ForceZero(buf, sizeof(buf)); + goto exit; + } + XMEMCPY(Ek_J0, buf, WC_AES_BLOCK_SIZE); + ForceZero(buf, sizeof(buf)); + + SAES->CR &= ~AES_CR_EN; + + GHASH(gcmp, aad, aadSz, NULL, 0, Y, WC_AES_BLOCK_SIZE); + for (i = 0; i < WC_AES_BLOCK_SIZE; i++) { + Y[i] ^= Ek_J0[i]; + } + XMEMCPY(tag, Y, tagSz); + ret = 0; + +exit: + SAES->CR &= ~AES_CR_EN; +#ifdef AES_CR_IPRST + SAES->CR |= AES_CR_IPRST; + __DSB(); + SAES->CR &= ~AES_CR_IPRST; +#endif + WC_DHUK_CCF_CLEAR(); + ForceZero(Ek_J0, sizeof(Ek_J0)); + ForceZero(Y, sizeof(Y)); +#ifdef WOLFSSL_SMALL_STACK + if (gcmp != NULL) { + ForceZero(gcmp, sizeof(*gcmp)); + XFREE(gcmp, NULL, DYNAMIC_TYPE_AES); + } +#else + ForceZero(gcmp, sizeof(*gcmp)); +#endif + if (locked) { + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} + +/* Seed source: the per-key 256-bit seed reaches the device as the AES key bytes + * (aes->devKey, set by the normal wc_AesSetKey / wc_AesGcmSetKey), or NULL if + * not a 256-bit seed key. Mirrors the bare Stm32Dhuk_AesSeed. */ +static const byte* Stm32Dhuk_AesSeedCubeMX(Aes* aes) +{ + if (aes == NULL || aes->keylen != 32) { + return NULL; + } + return (const byte*)aes->devKey; +} + +/* Route a cipher (AES-ECB, AES-GCM/GMAC) request to the SAES backend. Mirrors + * the bare Stm32Dhuk_Cipher: ECB and GMAC only; CBC and full-payload GCM fail + * loud (NOT_COMPILED_IN), not CRYPTOCB_UNAVAILABLE, so wolfCrypt does not fall + * back to a SW path that would key off the seed (aes->key/devKey) instead of + * the SAES-derived device key. */ +static int Stm32Dhuk_CipherCubeMX(struct wc_CryptoInfo* info) +{ + const byte* seed; + int ret; + + switch (info->cipher.type) { +#if defined(HAVE_AES_ECB) || defined(WOLFSSL_AES_DIRECT) || \ + defined(WOLF_CRYPTO_CB_ONLY_AES) + case WC_CIPHER_AES_ECB: + seed = Stm32Dhuk_AesSeedCubeMX(info->cipher.aesecb.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + return Stm32Dhuk_AesCubeMX(seed, WC_DHUK_MODE_ECB, info->cipher.enc, + info->cipher.aesecb.in, + info->cipher.aesecb.sz, + info->cipher.aesecb.out, NULL, 0); +#endif +#if defined(HAVE_AES_CBC) + case WC_CIPHER_AES_CBC: + /* Not dispatched in a real DHUK build (wc_AesCbcEncrypt/Decrypt are the + * public CBC entry points and reject a DHUK devId directly); fail loud + * as defense-in-depth. */ + (void)ret; + return NOT_COMPILED_IN; +#endif +#ifdef HAVE_AESGCM + case WC_CIPHER_AES_GCM: + if (info->cipher.enc) { + if (info->cipher.aesgcm_enc.sz != 0) { + return NOT_COMPILED_IN; /* GMAC only (empty plaintext) */ + } + seed = Stm32Dhuk_AesSeedCubeMX(info->cipher.aesgcm_enc.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + return Stm32Dhuk_GmacCubeMX(seed, + info->cipher.aesgcm_enc.iv, + info->cipher.aesgcm_enc.ivSz, + info->cipher.aesgcm_enc.authIn, + info->cipher.aesgcm_enc.authInSz, + info->cipher.aesgcm_enc.authTag, + info->cipher.aesgcm_enc.authTagSz); + } + else { + byte tag[WC_AES_BLOCK_SIZE]; + word32 tagSz = info->cipher.aesgcm_dec.authTagSz; + if (info->cipher.aesgcm_dec.sz != 0) { + return NOT_COMPILED_IN; + } + if (tagSz == 0 || tagSz > sizeof(tag)) { + return BAD_FUNC_ARG; + } + seed = Stm32Dhuk_AesSeedCubeMX(info->cipher.aesgcm_dec.aes); + if (seed == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + XMEMSET(tag, 0, sizeof(tag)); + ret = Stm32Dhuk_GmacCubeMX(seed, + info->cipher.aesgcm_dec.iv, + info->cipher.aesgcm_dec.ivSz, + info->cipher.aesgcm_dec.authIn, + info->cipher.aesgcm_dec.authInSz, + tag, tagSz); + if (ret != 0) { + ForceZero(tag, sizeof(tag)); + return ret; + } + ret = ConstantCompare(tag, info->cipher.aesgcm_dec.authTag, + (int)tagSz); + ForceZero(tag, sizeof(tag)); + return (ret == 0) ? 0 : AES_GCM_AUTH_E; + } +#endif + default: + return CRYPTOCB_UNAVAILABLE; + } +} + +#if defined(WOLFSSL_STM32_PKA) && defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) +/* Forward decls: PKA curve-parameter converters defined later in the + * WOLFSSL_STM32_PKA section of this file. */ +static int stm32_get_from_hexstr(const char* hex, uint8_t* dst, int sz); +static int stm32_getabs_from_hexstr(const char* hex, uint8_t* dst, int sz, + uint32_t *abs_sign); +static int stm32_get_from_mp_int(uint8_t *dst, const mp_int *a, int sz); + +/* Seed-DHUK ECDSA sign, CubeMX build. Recovers the wrapped private scalar with + * the SAES-derived device key (Stm32Dhuk_AesCubeMX ECB-decrypt -- the register + * path, bit-identical to bare, so the scalar matches the bare/provisioned key) + * into a short-lived buffer, then signs with the HW PKA via HAL_PKA_ECDSASign + * and DER-encodes (r,s). Mirrors the bare Stm32Dhuk_Sign. */ +static int Stm32Dhuk_SignCubeMX(ecc_key* key, const byte* hash, word32 hashLen, + byte* sig, word32* sigLen, WC_RNG* rng) +{ + PKA_ECDSASignInTypeDef pka_ecc; + PKA_ECDSASignOutTypeDef pka_ecc_out; + mp_int gen_k; + mp_int order_mp; + mp_int r; + mp_int s; + uint8_t Keybin[STM32_MAX_ECC_SIZE]; + uint8_t Intbin[STM32_MAX_ECC_SIZE]; + uint8_t Rbin[STM32_MAX_ECC_SIZE]; + uint8_t Sbin[STM32_MAX_ECC_SIZE]; + uint8_t Hashbin[STM32_MAX_ECC_SIZE]; + uint8_t prime[STM32_MAX_ECC_SIZE]; + uint8_t coefA[STM32_MAX_ECC_SIZE]; +#ifdef WOLFSSL_STM32_PKA_V2 + uint8_t coefB[STM32_MAX_ECC_SIZE]; +#endif + uint8_t gen_x[STM32_MAX_ECC_SIZE]; + uint8_t gen_y[STM32_MAX_ECC_SIZE]; + uint8_t order[STM32_MAX_ECC_SIZE]; + uint32_t coefA_sign = 1; + int size; + int status; + + XMEMSET(&pka_ecc, 0, sizeof(pka_ecc)); + XMEMSET(&pka_ecc_out, 0, sizeof(pka_ecc_out)); + + if (key == NULL || sig == NULL || sigLen == NULL || hash == NULL || + rng == NULL || key->dp == NULL) { + return ECC_BAD_ARG_E; + } + if (key->dhuk_seed_sz != 32u) { + return BAD_FUNC_ARG; + } + if (key->dhuk_wrapped_priv_len == 0u || + (key->dhuk_wrapped_priv_len % 16u) != 0u || + key->dhuk_wrapped_priv_len > (word32)STM32_MAX_ECC_SIZE) { + return BAD_FUNC_ARG; + } + size = wc_ecc_size(key); + if ((int)key->dhuk_plain_priv_len != size) { + return BAD_FUNC_ARG; + } + + XMEMSET(Keybin, 0, sizeof(Keybin)); + + /* Recover the scalar: ECB-decrypt the wrapped blob with the DHUK-derived + * key (register SAES path). */ + status = Stm32Dhuk_AesCubeMX(key->dhuk_seed, WC_DHUK_MODE_ECB, 0, + key->dhuk_wrapped_priv, + key->dhuk_wrapped_priv_len, Keybin, NULL, 0); + if (status != 0) { + ForceZero(Keybin, sizeof(Keybin)); + return status; + } + + /* Curve parameters for PKA. */ + status = stm32_get_from_hexstr(key->dp->prime, prime, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->order, order, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Gx, gen_x, size); + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Gy, gen_y, size); + if (status == MP_OKAY) + status = stm32_getabs_from_hexstr(key->dp->Af, coefA, size, + &coefA_sign); +#ifdef WOLFSSL_STM32_PKA_V2 + if (status == MP_OKAY) + status = stm32_get_from_hexstr(key->dp->Bf, coefB, size); +#endif + if (status != MP_OKAY) { + ForceZero(Keybin, sizeof(Keybin)); + return status; + } + + /* Random per-sign "k". */ + mp_init(&gen_k); + mp_init(&order_mp); + status = mp_read_unsigned_bin(&order_mp, order, size); + if (status == MP_OKAY) + status = wc_ecc_gen_k(rng, size, &gen_k, &order_mp); + if (status == MP_OKAY) + status = stm32_get_from_mp_int(Intbin, &gen_k, size); + mp_clear(&gen_k); + mp_clear(&order_mp); + if (status != MP_OKAY) { + ForceZero(Keybin, sizeof(Keybin)); + ForceZero(Intbin, sizeof(Intbin)); + return status; + } + + pka_ecc.primeOrderSize = size; + pka_ecc.modulusSize = size; + pka_ecc.coefSign = coefA_sign; + pka_ecc.coef = coefA; +#ifdef WOLFSSL_STM32_PKA_V2 + pka_ecc.coefB = coefB; +#endif + pka_ecc.modulus = prime; + pka_ecc.basePointX = gen_x; + pka_ecc.basePointY = gen_y; + pka_ecc.primeOrder = order; + + XMEMSET(Hashbin, 0, sizeof(Hashbin)); + if (hashLen > (word32)STM32_MAX_ECC_SIZE) { + ForceZero(Keybin, sizeof(Keybin)); + ForceZero(Intbin, sizeof(Intbin)); + return ECC_BAD_ARG_E; + } + else if ((int)hashLen > size) { + XMEMCPY(Hashbin, hash, size); + } + else { + XMEMCPY(Hashbin + (size - hashLen), hash, hashLen); + } + pka_ecc.hash = Hashbin; + pka_ecc.integer = Intbin; + pka_ecc.privateKey = Keybin; + pka_ecc_out.RSign = Rbin; + pka_ecc_out.SSign = Sbin; + + status = HAL_PKA_ECDSASign(&hpka, &pka_ecc, HAL_MAX_DELAY); + if (status != HAL_OK) { + HAL_PKA_RAMReset(&hpka); + ForceZero(Keybin, sizeof(Keybin)); + ForceZero(Intbin, sizeof(Intbin)); + return WC_HW_E; + } + HAL_PKA_ECDSASign_GetResult(&hpka, &pka_ecc_out, NULL); + HAL_PKA_RAMReset(&hpka); + + /* DER-encode (r, s) into the caller's signature buffer. */ + mp_init(&r); + mp_init(&s); + status = mp_read_unsigned_bin(&r, Rbin, size); + if (status == MP_OKAY) + status = mp_read_unsigned_bin(&s, Sbin, size); + if (status == MP_OKAY) + status = StoreECC_DSA_Sig(sig, sigLen, &r, &s); + mp_clear(&r); + mp_clear(&s); + + ForceZero(Keybin, sizeof(Keybin)); + ForceZero(Intbin, sizeof(Intbin)); + return status; +} +#endif /* WOLFSSL_STM32_PKA && HAVE_ECC && HAVE_ECC_SIGN */ +#endif /* WC_STM32_HAS_DHUK */ + +#if defined(WOLF_CRYPTO_CB) && defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) +/* CubeMX DHUK/CCB crypto-callback device. Routes transparent DHUK AES/GMAC (the + * symmetric path) to the HAL SAES primitives, EC keygen to the CCB blob-create, + * and ECDSA sign either to the HW-protected CCB sign (key->dhuk_is_ccb) or the + * seed-DHUK sign. Mirrors the bare-metal device so the same wc_Aes* / + * wc_ecc_sign_hash flow works on both build paths. */ +static int Stm32_CubeMX_CryptoDevCb(int devId, struct wc_CryptoInfo* info, + void* ctx) +{ +#if defined(WOLFSSL_STM32_CCB) + ecc_key* key; + byte r[MAX_ECC_BYTES]; + byte s[MAX_ECC_BYTES]; + word32 sz; +#endif + int ret; + + (void)devId; + (void)ctx; + if (info == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + +#if defined(WC_STM32_HAS_DHUK) + if (info->algo_type == WC_ALGO_TYPE_CIPHER) { + return Stm32Dhuk_CipherCubeMX(info); + } +#endif + + if (info->algo_type != WC_ALGO_TYPE_PK) { + return CRYPTOCB_UNAVAILABLE; + } + +#if defined(WOLFSSL_STM32_CCB) + /* Transparent provisioning: wc_ecc_make_key() on a WC_DHUK_DEVID key binds + * a fresh CCB-protected blob to it (no CCB-specific API). */ + if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) { + return wc_ecc_dev_make_key(info->pk.eckg.rng, info->pk.eckg.size, + info->pk.eckg.key, info->pk.eckg.curveId); + } +#endif + if (info->pk.type != WC_PK_TYPE_ECDSA_SIGN) { + return CRYPTOCB_UNAVAILABLE; + } + +#if defined(WOLFSSL_STM32_CCB) + key = info->pk.eccsign.key; + if (key != NULL && key->dhuk_is_ccb != 0u) { + sz = (word32)wc_ecc_size(key); + ret = wc_Stm32_Ccb_EccSign(ECC_SECP256R1, key->ccb_iv, key->ccb_tag, + key->dhuk_wrapped_priv, + key->dhuk_wrapped_priv_len, + info->pk.eccsign.in, info->pk.eccsign.inlen, + r, s); + if (ret == 0) { + ret = wc_ecc_rs_raw_to_sig(r, sz, s, sz, + info->pk.eccsign.out, + info->pk.eccsign.outlen); + } + ForceZero(r, sizeof(r)); + ForceZero(s, sizeof(s)); + return ret; + } +#endif /* WOLFSSL_STM32_CCB */ + + /* Seed-DHUK ECDSA sign (non-CCB key): recover the wrapped scalar via the + * SAES register path and sign with the HW PKA. */ +#if defined(WC_STM32_HAS_DHUK) && defined(WOLFSSL_STM32_PKA) + { + ecc_key* skey = info->pk.eccsign.key; + if (skey != NULL && skey->dhuk_seed_sz == 32u) { + return Stm32Dhuk_SignCubeMX(skey, info->pk.eccsign.in, + info->pk.eccsign.inlen, info->pk.eccsign.out, + info->pk.eccsign.outlen, info->pk.eccsign.rng); + } + } +#endif + ret = CRYPTOCB_UNAVAILABLE; return ret; } @@ -4941,7 +5735,7 @@ static int Stm32Ccb_CryptoDevCb(int devId, struct wc_CryptoInfo* info, * name and contract as the bare-metal version so callers are build-agnostic. */ int wc_Stm32_DhukRegister(int devId) { - return wc_CryptoCb_RegisterDevice(devId, Stm32Ccb_CryptoDevCb, NULL); + return wc_CryptoCb_RegisterDevice(devId, Stm32_CubeMX_CryptoDevCb, NULL); } void wc_Stm32_DhukUnRegister(int devId) @@ -4949,7 +5743,7 @@ void wc_Stm32_DhukUnRegister(int devId) wc_CryptoCb_UnRegisterDevice(devId); } #endif /* WOLF_CRYPTO_CB && HAVE_ECC && HAVE_ECC_SIGN */ -#endif /* WOLFSSL_STM32_CCB && WOLFSSL_STM32_CUBEMX */ +#endif /* WOLFSSL_STM32_CUBEMX && (WOLFSSL_STM32_CCB || WC_STM32_HAS_DHUK) */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index de5a72ffb22..2b5ccdcf65b 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -4251,13 +4251,16 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) } #elif defined(STM32_RNG) - /* Generate a RNG seed using the hardware random number generator - * on the STM32F2/F4/F7/L4. */ + /* Generate a RNG seed using the STM32 hardware RNG. Covers the STM32 + * families that carry an RNG IP block -- via the CubeMX HAL or the + * bare-metal direct-register backend (incl. the C5 NIST init and a + * generic bounded retry) -- not just the original F2/F4/F7/L4. */ #include /* Pulls in WC_STM32_RNG_CLK_ENABLE for WOLFSSL_STM32_BARE builds */ #ifdef WC_STM32_RNG_DIAG - /* The WC_STM32_RNG_DIAG paths below use printf(); pull in stdio.h so the - * file compiles on strict C99+ toolchains when diagnostics are enabled. */ + /* The WC_STM32_RNG_DIAG paths below use printf(); pull in stdio.h so + * the file compiles on strict C99+ toolchains when diagnostics are + * enabled. */ #include #endif @@ -4473,8 +4476,9 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) } #endif - /* (No early SECS/CECS bail here unless WOLFSSL_STM32_RNG_LEGACY_FAILFAST - * is defined, above.) The HAL doesn't check error status immediately + /* (No early SECS/CECS bail here unless + * WOLFSSL_STM32_RNG_LEGACY_FAILFAST is defined, above.) The HAL + * doesn't check error status immediately * after RNGEN -- the IP needs a few cycles after enable for the first * seed pull, and a transient SEIS/SECS can latch and resolve itself * through the auto-reset that the retry loop below already handles. diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index 970ec1d9be2..8a176a21f7b 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -763,9 +763,12 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen, WOLFSSL_API int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s); -#if defined(WOLFSSL_DHUK) && defined(WOLFSSL_STM32_BARE) && \ - defined(WC_STM32_HAS_DHUK) && \ - defined(WOLFSSL_STM32_PKA) && !defined(WC_STM32_PKA_VERIFY_ONLY) +#if defined(WOLFSSL_DHUK) && \ + (defined(WOLFSSL_STM32_BARE) || defined(WOLFSSL_STM32_CUBEMX)) +/* Gated on user-visible macros only (WOLFSSL_DHUK + the build-flavor), NOT the + * port-internal WC_STM32_HAS_DHUK -- that one is defined in port/st/stm32.h, + * which ecc.h does not include, so a WC_STM32_HAS_DHUK guard here would leave + * the prototype invisible to ecc.h consumers and to the definition's own TU. */ /* DHUK ECC sign: import a hardware-wrapped ECC private scalar + its derivation * seed onto the ecc_key for the crypto-callback sign path. The caller MUST also * populate key->pubkey (via wc_ecc_import_x963) so verify can use the diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 0f9507f9824..31a2ded7b0d 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -813,11 +813,11 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Family gate: only families that actually have SAES + DHUK silicon. * L5 has a "secure AES" instance but its CR layout does not include * KMOD / KEYSEL fields -- it does not implement the same DHUK key- - * wrap protocol as U5/U3/H5/WBA/C5. L5 is intentionally excluded. */ + * wrap protocol as U5/U3/H5/WBA/C5/N6. L5 is intentionally excluded. */ #if defined(WOLFSSL_DHUK) && \ (defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32WBA) || \ - defined(WOLFSSL_STM32C5) || defined(WOLFSSL_STM32H7S)) + defined(WOLFSSL_STM32C5) || defined(WOLFSSL_STM32N6)) #define WC_STM32_HAS_DHUK #endif @@ -874,10 +874,10 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, * via ST's HAL_CCB_* driver). Requires CCB silicon (STM32U3 or STM32C5). */ #if defined(WOLFSSL_STM32_CCB) #if !defined(WOLFSSL_STM32_BARE) && !defined(WOLFSSL_STM32_CUBEMX) - #error "WOLFSSL_STM32_CCB requires WOLFSSL_STM32_BARE or WOLFSSL_STM32_CUBEMX" + #error "WOLFSSL_STM32_CCB requires WOLFSSL_STM32_BARE or CUBEMX" #endif #if !defined(WC_STM32_HAS_CCB) - #error "WOLFSSL_STM32_CCB requires CCB silicon (STM32U3/U385 or STM32C5/C5A3)" + #error "WOLFSSL_STM32_CCB requires CCB silicon (STM32U3 or STM32C5)" #endif #endif @@ -949,10 +949,12 @@ int stm32_ecc_sign_hash_ex(const byte* hash, word32 hashlen, struct WC_RNG* rng, #endif /* WOLFSSL_STM32_BARE && WC_STM32_HAS_DHUK */ -/* CubeMX CCB build: DHUK AES/GMAC is bare-only, but the CCB-protected ECDSA - * sign routes through the crypto-callback device too, so expose the same - * register/unregister entry points under the HAL build. */ -#if defined(WOLFSSL_STM32_CUBEMX) && defined(WOLFSSL_STM32_CCB) && \ +/* CubeMX build: the DHUK crypto-callback device (transparent AES/GMAC and + * seed-based ECDSA sign via the SAES register path, plus CCB sign/keygen where + * the CCB peripheral is present) registers through the same entry points as the + * bare build. */ +#if defined(WOLFSSL_STM32_CUBEMX) && \ + (defined(WC_STM32_HAS_DHUK) || defined(WOLFSSL_STM32_CCB)) && \ defined(WOLF_CRYPTO_CB) int wc_Stm32_DhukRegister(int devId); void wc_Stm32_DhukUnRegister(int devId);