Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ee7c58f
dedup_key: add tableShareKey for post-embed smallTable identity
sayrer May 30, 2026
c6d4ef1
epsi_closure: dedup via tableShareKey instead of *smallTable
sayrer May 30, 2026
2559315
nfa: embed smallTable into faState by value
sayrer May 30, 2026
b8d2149
tests: recalibrate size assertions after embedding smallTable
sayrer May 30, 2026
69415ce
epsi_closure: pool buffers, restore two-counter dedup
sayrer May 31, 2026
3242841
state_lists: dedup intern() via sort+compact, drop the seen map
sayrer May 31, 2026
885141d
Merge remote-tracking branch 'origin/main' into embed-smalltable
sayrer May 31, 2026
edc66b7
kaizen: add research mainline which generates CSV data
timbray May 31, 2026
5fe237b
docs: spec for incremental epsilon closure via walk pruning
sayrer May 31, 2026
534327a
docs: implementation plan for incremental epsilon closure
sayrer May 31, 2026
18547f6
docs: record epsilon/step immutability verification for closure prune
sayrer May 31, 2026
a6c44a9
test: order-independence guard for incremental epsilon closure
sayrer May 31, 2026
20c980d
research: add -cpuprofile flag for profiling the harness
sayrer May 31, 2026
5976a4f
epsi_closure: prune closure walk at already-closed states
sayrer May 31, 2026
f96e39a
epsi_closure: address review — reset walk counter, clarify comments
sayrer May 31, 2026
00251fc
docs: spec for self-only epsilon-closure sentinel
sayrer Jun 1, 2026
48844f5
docs: implementation plan for self-only closure sentinel
sayrer Jun 1, 2026
e5b1370
nfa: add len==0 self-only discriminator to closure consumers
sayrer Jun 1, 2026
033aacb
epsi_closure: store {self} closures as a zero-alloc sentinel
sayrer Jun 1, 2026
ebd46bd
epsi_closure: document closure sentinel encoding; fix stale test guards
sayrer Jun 1, 2026
8c8446a
docs: record self-only sentinel benchmark results
sayrer Jun 1, 2026
d855451
Merge branch 'main' into embed-smalltable
sayrer Jun 4, 2026
c2b6bbf
Resolve CPU profile merge conflict.
sayrer Jun 4, 2026
90062c4
gofmt: fix struct field alignment in faState
sayrer Jun 5, 2026
0049695
epsi_closure: document closureGen/closureRep tableMark fields
sayrer Jun 7, 2026
28f1ff7
dedup_key: drop redundant stepsLen from tableShareKey
sayrer Jun 7, 2026
03c455e
memory_cost: drop impossible nil guard on epsilons walk
sayrer Jun 7, 2026
83492f8
epsi_closure: clarify why the buffer pool uses sync.Pool
sayrer Jun 7, 2026
a63957e
value_matcher: rename vmFields.startState to start
sayrer Jun 7, 2026
2d472e8
epsi_closure: add high-level overview of closure construction
sayrer Jun 7, 2026
f088b2b
nfa: fix traverseNFA start-closure comment rationale
sayrer Jun 7, 2026
a43a651
epsi_closure: note the post-dedup self-only branch is an uncovered op…
sayrer Jun 7, 2026
fcce28d
docs: remove superpowers plan/spec design docs
sayrer Jun 8, 2026
3b6e169
test: rewrite TestEpsilonClosureRequired with interior-spinner a*z
sayrer Jun 8, 2026
f234c4a
nfa: pass faStates to the value-matcher start merges
sayrer Jun 8, 2026
76b2051
nfa: fix stale nfa2Dfa start-closure comment
sayrer Jun 8, 2026
36486fd
epsi_closure: drop sync.Pool for a matcher-owned closureBuffers clear…
sayrer Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions anything_but.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ func readAnythingButSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typ
// Making a succession of anything-but automata for each of "a" and "b" and then merging them turns out not
// to work because what the caller means is really an AND - everything that matches neither "a" nor "b". So
// in principle we could intersect automata.
func makeMultiAnythingButFA(vals [][]byte) (*smallTable, *fieldMatcher) {
func makeMultiAnythingButFA(vals [][]byte) (*faState, *fieldMatcher) {
nextField := newFieldMatcher()
success := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}

ret, _ := makeOneMultiAnythingButStep(vals, 0, success), nextField
return ret, nextField
startTable := makeOneMultiAnythingButStep(vals, 0, success)
return &faState{table: startTable}, nextField
}

// makeOneMultiAnythingButStep - spookeh. The idea is that there will be N smallTables in this FA, where N is
Expand All @@ -84,7 +84,7 @@ func makeMultiAnythingButFA(vals [][]byte) (*smallTable, *fieldMatcher) {
// yet been exhausted. We notice when we get to the end of each val and put in a valueTerminator transition
// to a step with no nextField entry, i.e. failure because we've exactly matched one of the anything-but
// strings.
func makeOneMultiAnythingButStep(vals [][]byte, index int, success *faState) *smallTable {
func makeOneMultiAnythingButStep(vals [][]byte, index int, success *faState) smallTable {
// this will be the default transition in all the anything-but tables.
var u unpackedTable
for i := range u {
Expand Down
13 changes: 11 additions & 2 deletions core_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ import (
type coreMatcher struct {
updateable atomic.Pointer[coreFields]
lock sync.Mutex
// closureBufs is scratch for epsilon-closure computation, reused across
// every AddPattern. addPattern holds lock for the whole build, so it is
// never accessed concurrently. Lives here (not a sync.Pool) so the maps are
// never evicted mid-build; see epsilonClosureInto.
closureBufs *closureBuffers
}

// coreFields groups the updateable fields in coreMatcher.
Expand All @@ -38,7 +43,7 @@ type coreFields struct {
}

func newCoreMatcher() *coreMatcher {
m := coreMatcher{}
m := coreMatcher{closureBufs: newClosureBuffers()}
m.updateable.Store(&coreFields{
state: newFieldMatcher(),
segmentsTree: newSegmentsIndex(),
Expand Down Expand Up @@ -71,6 +76,10 @@ func (m *coreMatcher) addPatternWithPrinter(x X, patternJSON string, printer pri
m.lock.Lock()
defer m.lock.Unlock()

// Reuse the closure scratch but empty it each build so its maps hold only
// this pattern's working set, not every state in the matcher.
m.closureBufs.reset()

// we build up the new coreMatcher state in freshStart so that we can atomically switch it in once complete

freshStart := &coreFields{}
Expand Down Expand Up @@ -105,7 +114,7 @@ func (m *coreMatcher) addPatternWithPrinter(x X, patternJSON string, printer pri
case existsFalseType:
ns = state.addExists(false, field)
default:
ns = state.addTransition(field, printer)
ns = state.addTransition(field, printer, m.closureBufs)
}
nextStates = append(nextStates, ns...)
}
Expand Down
4 changes: 2 additions & 2 deletions core_matcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,8 @@ func TestSimpleaddPattern(t *testing.T) {
// which means the finite automata are hidden deep inside the coreMatcher instance
// and hard to get at. This helper routine fetches the value-matcher automaton
// corresponding to the "path" argument
func fetchFAForPath(t *testing.T, cm *coreMatcher, path string) *smallTable {
func fetchFAForPath(t *testing.T, cm *coreMatcher, path string) *faState {
t.Helper()
vm := cm.fields().state.fields().transitions[path]
return vm.fields().startTable
return vm.fields().start
}
31 changes: 31 additions & 0 deletions dedup_key.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package quamina

import "unsafe"

// tableShareKey returns a stable identifier for a smallTable's "share group".
// Two states whose smallTables hold slice-headers pointing at the same `steps`
// backing array (which is what happens when one smallTable struct value is
// copied into multiple faStates during construction) will produce equal
// keys. This replaces *smallTable-pointer identity as the dedup key in
// epsilon-closure computation after smallTable is embedded into faState
// by value.
//
// The key is just the steps backing-array pointer: share groups are only ever
// born by copying a whole steps slice-header (see the spinner merges in
// nfa.go), so two tables that share the data pointer always share the length
// too — nothing in the package reslices steps. Pointer identity is therefore
// sufficient to identify a share group; carrying the length as well would
// never break a tie the pointer didn't already break.
//
// A zero key (nil pointer) means "no share group" — used for tables with no
// byte transitions. Callers that want to dedup such tables should skip the
// zero key.
type tableShareKey struct {
stepsData unsafe.Pointer
}

func newTableShareKey(t *smallTable) tableShareKey {
return tableShareKey{
stepsData: unsafe.Pointer(unsafe.SliceData(t.steps)),
}
Comment thread
sayrer marked this conversation as resolved.
}
57 changes: 57 additions & 0 deletions dedup_key_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package quamina

import (
"testing"
)

func TestTableShareKey_SharedBackings(t *testing.T) {
// Construct one smallTable, value-copy it (simulating post-embed share).
src := smallTable{
ceilings: []byte{'a', 'b', byte(byteCeiling)},
steps: []*faState{nil, nil, nil},
}
copy1 := src
copy2 := src
if newTableShareKey(&copy1) != newTableShareKey(&copy2) {
Comment thread
sayrer marked this conversation as resolved.
t.Errorf("value-copied tables should share key; got %v vs %v",
newTableShareKey(&copy1), newTableShareKey(&copy2))
}
}

func TestTableShareKey_DistinctBackings(t *testing.T) {
t1 := smallTable{
ceilings: []byte{'a', byte(byteCeiling)},
steps: []*faState{nil, nil},
}
t2 := smallTable{
ceilings: []byte{'a', byte(byteCeiling)},
steps: []*faState{nil, nil},
}
if newTableShareKey(&t1) == newTableShareKey(&t2) {
t.Errorf("independently-built tables should not share key")
}
}

// TestTableShareKey_AppendBreaksShare verifies that when a value-copy
// is mutated via append in a way that reallocates the backing array,
// the keys diverge. We force reallocation by starting at cap=1 and
// appending many entries.
func TestTableShareKey_AppendBreaksShare(t *testing.T) {
src := smallTable{
ceilings: make([]byte, 0, 1),
steps: make([]*faState, 0, 1),
}
src.ceilings = append(src.ceilings, byte(byteCeiling))
src.steps = append(src.steps, nil)
copy1 := src
// Appending 8 entries to a slice with cap=1 guarantees at least one
// realloc of the steps backing.
for i := 0; i < 8; i++ {
copy1.steps = append(copy1.steps, nil)
copy1.ceilings = append(copy1.ceilings, byte(i))
}
if newTableShareKey(&src) == newTableShareKey(&copy1) {
t.Errorf("expected keys to diverge after append-with-realloc; got equal: %v",
newTableShareKey(&src))
}
}
Loading
Loading