Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions .github/workflows/generate-structured-token.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: Generate Structured Token


on:
workflow_dispatch:
inputs:
system_id:
description: 'System abbreviation for the token.'
required: true
type: string
environment:
description: 'Target deployment environment for the token.'
required: true
type: choice
options:
- Preview
- Production
domain_purpose:
description: 'Domain purpose abbreviation for the token.'
required: true
type: string


jobs:
generate_token:
name: Generate Token
runs-on: ubuntu-latest
steps:
- name: 🔍 Validate Inputs
id: validate_inputs
run: |
SYSTEM_ID='${{ inputs.system_id }}'
DOMAIN_PURPOSE='${{ inputs.domain_purpose }}'

if [[ -z "$SYSTEM_ID" ]]; then
echo "::error::System identifier must not be empty."

exit 1
fi

if [[ -z "$DOMAIN_PURPOSE" ]]; then
echo "::error::Domain purpose identifier must not be empty."

exit 1
fi

if [[ "$SYSTEM_ID" =~ [_\ ] ]]; then
echo "::error::System identifier must not contain underscores or spaces."

exit 1
fi

if [[ "$DOMAIN_PURPOSE" =~ [_\ ] ]]; then
echo "::error::Domain purpose identifier must not contain underscores or spaces."

exit 1
fi

if [ '${{ inputs.environment }}' = 'Preview' ]; then
echo "env_id=prev" >> "$GITHUB_OUTPUT"
else
echo "env_id=prod" >> "$GITHUB_OUTPUT"
fi

- name: 🎲 Generate Token
id: generate_token
run: |
SYSTEM_ID='${{ inputs.system_id }}'
ENV_ID='${{ steps.validate_inputs.outputs.env_id }}'
DOMAIN_PURPOSE='${{ inputs.domain_purpose }}'

# Generate 256-bit high-strength random entropy.
ENTROPY=$(openssl rand -hex 32)

# Assemble token body (without checksum).
TOKEN_BODY="${SYSTEM_ID}_${ENV_ID}_${DOMAIN_PURPOSE}_${ENTROPY}"

# Compute CRC32 checksum encoded as 6-char base62.
# Base62 alphabet: 0-9 A-Z a-z; 62^6 > 2^32 so the full CRC32 fits losslessly.
CHECKSUM=$(python3 - "$TOKEN_BODY" << 'PYEOF'
import binascii, sys
ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
data = sys.argv[1]
crc = binascii.crc32(data.encode("utf-8")) & 0xFFFFFFFF
result = []

for _ in range(6):
result.append(ALPHABET[crc % 62])
crc //= 62
print("".join(reversed(result)))

PYEOF
)

FINAL_TOKEN="${TOKEN_BODY}_${CHECKSUM}"

echo "system_id=${SYSTEM_ID}" >> "$GITHUB_OUTPUT"
echo "env_id=${ENV_ID}" >> "$GITHUB_OUTPUT"
echo "domain_purpose=${DOMAIN_PURPOSE}" >> "$GITHUB_OUTPUT"
echo "entropy=${ENTROPY}" >> "$GITHUB_OUTPUT"
echo "checksum=${CHECKSUM}" >> "$GITHUB_OUTPUT"
echo "final_token=${FINAL_TOKEN}" >> "$GITHUB_OUTPUT"

- name: '📝 Write Summary'
env:
FINAL_TOKEN: ${{ steps.generate_token.outputs.final_token }}
CHECKSUM: ${{ steps.generate_token.outputs.checksum }}
run: |
cat >> "$GITHUB_STEP_SUMMARY" << EOF
## 🔑 Generated Structured Token

| Field | Value |
|---|---|
| System ID | \`${{ steps.generate_token.outputs.system_id }}\` |
| Environment ID | \`${{ steps.generate_token.outputs.env_id }}\` |
| Domain Purpose ID | \`${{ steps.generate_token.outputs.domain_purpose }}\` |
| Entropy | \`(masked — 256-bit random)\` |
| CRC32 Checksum (base62) | \`${CHECKSUM}\` |

\`\`\`
${FINAL_TOKEN}
\`\`\`
EOF
11 changes: 11 additions & 0 deletions .github/workflows/prepare-release.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
name: Prepare Release


on:
push:
tags:
- 'v*'
- '*/v*'


permissions:
contents: write
pull-requests: write


jobs:
call-prepare:
uses: leoweyr/github-release-workflow/.github/workflows/reusable-prepare-release.yml@develop

with:
packages: |
{
"go": "go"
}

secrets:
ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
120 changes: 120 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,123 @@
# Tokenforge

One spec, every language — context-aware credential architecture for generating and verifying structured tokens with byte-identical layout and CRC32 tail checksums.

```
[SystemIdentifier]_[EnvironmentIdentifier]_[DomainPurposeIdentifier]_[Entropy][Checksum]
```

| Segment | Content | Width |
|------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|
| Prefix | System, environment, domain purpose identifiers, delimited by `_`<br />Restricts every semantic component to lowercase ASCII letters and decimal digits only. Underscores, uppercase letters, and any multi-byte non-ASCII characters are strictly forbidden.<br />`ALPHABET = 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz` | Variable (≥ 6) |
| High-intensity Entropy | 24 Base62 characters<br />Strictly the 62-character GMP dictionary, ordered by ASCII code in ascending order. Third-party Base62 dialects that embed internal permutations are prohibited<br />`PREFIX_CHARSET = [a-z0-9]` | 24 |
| Tail Checksum | 6 Base62 characters encoding CRC32-IEEE of Prefix + Entropy | 6 |

**Physical Seamless Fusion** — No separator is permitted between the high-intensity entropy and the tail checksum. The full-stack slice cursor operates as a hard asymmetric dead-lock: the last 6 characters are always the checksum, everything before them is always the base string.

**Absolute Unambiguous Prefix** — Each semantic component of the prefix is subject to strict character-set constraints that fundamentally eliminate cross-platform parsing ambiguity and the hash inconsistency introduced by Unicode normalization (NFC/NFD).

**Unbiased Uniform Sampling** — Direct modulo on a low-bit-width integer is prohibited across all platforms. Every random index must be drawn from an OS-level cryptographically secure random number generator (CSPRNG) via rejection sampling, completely eliminating modulo bias.

**Structured Threshold Assertion** — Perimeter length guards discard hard-coded magic numbers. Minimum valid lengths are derived dynamically from the credential's own topology formula.

## 🚀 Quick Start

### Go

```bash
go get go.leoweyr.com/tokenforge/go
```

## 🏗️ Generation Pipeline

### 1. Prefix Construction

Concatenate plaintext semantic segments for the given use case. Prefix length is variable and determined by system design, but must conform to a fixed topology.

*Example*: `odc_prod_msk`

### 2. Unbiased Entropy Generation

Draw 24 characters from `ALPHABET` using an OS-level CSPRNG with a rejection-sampling loop to ensure each index is drawn uniformly from `[0, 61]`. Direct modulo on a raw random integer is forbidden — it introduces a statistical skew that taints the distribution. The output is a strictly fixed 24-character string.

*Example*: `7xT2zP9qL4wK1mN8vV5cB3nA`

### 3. Base String Concatenation

Concatenate the prefix from step 1 and the high-intensity entropy string from step 2 directly:

$$
\text{BaseString} = \text{Prefix} + \text{HighIntensityEntropy}
$$

*Example*: `odc_prod_msk_7xT2zP9qL4wK1mN8vV5cB3nA`

### 4. Mathematical CRC32 Mapping

Compute the CRC32-IEEE (reflected form) checksum of `BaseString` encoded as a UTF-8 byte stream, yielding a 32-bit unsigned integer $\text{Value}$. Convert $\text{Value}$ to a 6-character Base62 string $\text{Checksum}$ using the following right-to-left modulo loop:

$$
\begin{array}{l}
\text{for } i = 5 \rightarrow 0: \\
\quad \text{Remainder} = \text{Value} \bmod 62 \\
\quad \text{Checksum}[i] = \text{ALPHABET}[\text{Remainder}] \\
\quad \text{Value} = \left\lfloor \dfrac{\text{Value}}{62} \right\rfloor
\end{array}
$$

The loop fills from the last position backward. Any value that does not require all 6 digits is naturally zero-padded at the front — no explicit padding logic is needed.

### 5. Final Assembly

Append the 6-character checksum directly to the end of the base string with no separator:

$$
\text{Token} = \text{BaseString} + \text{TailChecksum}
$$

*Example*: `odc_prod_msk_7xT2zP9qL4wK1mN8vV5cB3nA4VHrHM`

## 🛡️ Validation Pipeline

### 1. Structural Guard & Asymmetric Slice

At every network edge gateway or application entry point, perform hard physical boundary assertions before any business logic runs.

Length check:

$$
\text{MinLength} = \text{len}(\text{Prefix}) + 24 + 6
$$

If the validator holds a known expected `Prefix`, the token length must equal exactly $\text{len}(\text{Prefix}) + 30$. Without a known prefix, the absolute minimum token length is 36 characters (each of the three prefix components must be at least 1 character, so the shortest valid prefix is 6 characters). Any token shorter than the derived threshold is discarded immediately.

Atomic slice, ignoring internal underscore structure, using the fixed-width checksum cursor:

$$
\begin{matrix}
\text{BaseString} = \text{Token}[0:\text{len}(\text{Token}) - 6] \\
\text{ProvidedTailCheckSum} = \text{Token}[\text{len}(\text{Token}) - 6:\text{len}(\text{Token})]
\end{matrix}
$$

### 2. Idempotent Verification

Re-execute the generation pipeline step 4 mapping locally on the extracted `BaseString` to obtain `ExpectedTailChecksum`. If `ExpectedChecksum ≠ ProvidedChecksum`, the token is rejected immediately as corrupted or truncated — fail-fast, no further processing.

### 3. Context Reification

Only after passing step 2 may the system split the prefix portion of `BaseString` on `_`. Because `PREFIX_CHARSET` forbids underscores within any component, the split result is uniquely deterministic across every platform and encoding. Extract the system, environment, and domain purpose identifiers and inject them as a typed security context object into downstream operations.

## ⚖️ Why 24 Characters

On the cryptographic side, 24 Base62 characters carry:

$$
62^{24} \approx 2^{143} \text{ bits}
$$

The practical security floor for API tokens in cloud-native architecture is 128 bits. 24 characters delivers 143 bits — clearing the threshold with margin to spare.

Some vendors push further: GitHub's personal access tokens use 30 entropy characters, reaching ~178 bits. From a pure cryptographic standpoint, that number is unimpeachable. From an engineering leverage standpoint, it is unnecessary — beyond ~140 bits, the marginal security return per additional character converges to zero. The cost, however, is real. Every extra character widens network payloads, inflates database index pages, and — on mobile — turns a token into a string too long to select cleanly with a long-press.

Tokenforge locks high-intensity entropy at 24 characters: the precise point where cryptographic surplus meets transmission efficiency and human ergonomics, with nothing wasted on either side.
18 changes: 18 additions & 0 deletions go/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Changelog

All notable changes to this project will be documented in this file.

# [unreleased]
### Features

* **go:** implement token generation and validation module ([42a2e00](https://github.com/leoweyr/tokenforge/commit/42a2e00d9419792fbc719d8e5b53cb0392b4009d)) [@leoweyr](https://github.com/leoweyr)


### Refactor

* **go:** align module path with `go/` subdirectory location in repository ([f507e51](https://github.com/leoweyr/tokenforge/commit/f507e51324b143d75d962eeeb02cb7c29e580331)) [@leoweyr](https://github.com/leoweyr)
* batch CSPRNG reads and name components in validation errors ([bd322ad](https://github.com/leoweyr/tokenforge/commit/bd322adce4beb2ace1d1ef5fa9ad9121f43286ac)) [@leoweyr](https://github.com/leoweyr)



<!-- Generated by git-cliff. -->
63 changes: 63 additions & 0 deletions go/forge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package tokenforge

import (
"go.leoweyr.com/tokenforge/go/internal/checksum"
"go.leoweyr.com/tokenforge/go/internal/encoding"
"go.leoweyr.com/tokenforge/go/internal/entropy"
"go.leoweyr.com/tokenforge/go/internal/token"
)

// Forge is the public entry point of the Tokenforge module. It composes the
// generation and validation pipelines and exposes them behind a small surface.
type Forge struct {
generator *token.TokenGenerator
validator *token.TokenValidator
}

// NewForge builds a Forge by wiring the Base62 alphabet, the prefix alphabet, the
// Base62 encoder, the CRC32 checksum calculator, and the secure entropy generator
// into a generation pipeline and a validation pipeline.
func NewForge() *Forge {
var base62Alphabet *encoding.Alphabet = encoding.NewAlphabet(encoding.Base62Characters)
var prefixAlphabet *encoding.Alphabet = encoding.NewAlphabet(encoding.PrefixCharacters)

var encoder *encoding.Base62Encoder = encoding.NewBase62Encoder(base62Alphabet)
var checksumCalculator *checksum.Crc32Calculator = checksum.NewCrc32Calculator(encoder)
var entropyGenerator *entropy.SecureGenerator = entropy.NewSecureGenerator(base62Alphabet)

var generator *token.TokenGenerator = token.NewTokenGenerator(prefixAlphabet, entropyGenerator, checksumCalculator)
var validator *token.TokenValidator = token.NewTokenValidator(prefixAlphabet, checksumCalculator)

return &Forge{
generator: generator,
validator: validator,
}
}

// Generate runs the full generation pipeline for the given semantic identifiers and
// returns the rendered token string.
func (forge *Forge) Generate(systemIdentifier string, environmentIdentifier string, domainPurposeIdentifier string) (string, error) {
var generated *token.Token
var generationError error
generated, generationError = forge.generator.Generate(systemIdentifier, environmentIdentifier, domainPurposeIdentifier)

if generationError != nil {
return "", generationError
}

return generated.String(), nil
}

// Validate runs the full validation pipeline against the raw token and returns the
// reified security context when the token is structurally and cryptographically sound.
func (forge *Forge) Validate(rawToken string) (*SecurityContext, error) {
var validated *token.Token
var validationError error
validated, validationError = forge.validator.Validate(rawToken)

if validationError != nil {
return nil, validationError
}

return newSecurityContext(validated.SystemIdentifier(), validated.EnvironmentIdentifier(), validated.DomainPurposeIdentifier()), nil
}
3 changes: 3 additions & 0 deletions go/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module go.leoweyr.com/tokenforge/go

go 1.26.2
32 changes: 32 additions & 0 deletions go/internal/checksum/calculator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package checksum

import (
"hash/crc32"

"go.leoweyr.com/tokenforge/go/internal/encoding"
)

// Calculator maps a token base string to its fixed-width Base62 tail checksum.
type Calculator interface {
Calculate(baseString string) *Checksum
}

// Crc32Calculator derives the tail checksum from the CRC32-IEEE value of a base
// string encoded as Base62.
type Crc32Calculator struct {
encoder *encoding.Base62Encoder
}

// NewCrc32Calculator builds a Crc32Calculator backed by the given Base62 encoder.
func NewCrc32Calculator(encoder *encoding.Base62Encoder) *Crc32Calculator {
return &Crc32Calculator{encoder: encoder}
}

// Calculate computes the CRC32-IEEE value of the base string interpreted as a UTF-8
// byte stream and encodes it as a fixed-width Base62 checksum.
func (crc32Calculator *Crc32Calculator) Calculate(baseString string) *Checksum {
var value uint32 = crc32.ChecksumIEEE([]byte(baseString))
var encoded string = crc32Calculator.encoder.Encode(value, Length)

return NewChecksum(encoded)
}
Loading
Loading