-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdemo.py
More file actions
127 lines (106 loc) · 5.35 KB
/
demo.py
File metadata and controls
127 lines (106 loc) · 5.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
"""
demo.py -- NSG Quick-Start Demo
One command to demonstrate the entire Nested Semantic Graph project.
Imports from nst_core.py.
Usage: python demo.py
"""
import sys, time, json, os
from nst_core import (NestedSemanticTree, build_query_tree, build_doc,
match_pipeline, rank_results, detect_clusters,
tree_edit_distance, NODE_CATEGORIES, SURFACE_FORMS)
def section(title):
print(f"\n{'='*60}")
print(f" {title}")
print(f"{'='*60}")
def main():
section("NSG — NESTED SEMANTIC GRAPH DEMO")
print(f" Shared library: nst_core.py")
print(f" Node categories: {len(NODE_CATEGORIES)} ({', '.join(sorted(NODE_CATEGORIES))})")
print(f" Languages: {len(SURFACE_FORMS)} ({', '.join(sorted(SURFACE_FORMS.keys()))})")
# --- T1: Build trees ---
section("T1: Build Nested Semantic Trees")
print(" English: 'The dog bit the man yesterday'")
en = build_query_tree("English")
en.print_tree()
n, v = en.verify_ultrametric()
print(f" Ultrametric: {'PASS' if v==0 else 'FAIL'} ({n} triples)")
# --- T2: Language neutrality ---
section("T2: Language Neutrality")
trees = {}
for lang in ["English", "Turkish", "Mohawk", "Finnish", "Inuktitut"]:
trees[lang] = build_query_tree(lang)
n_, v_ = trees[lang].verify_ultrametric()
print(f" {lang:<12}: {len(trees[lang].nodes)} nodes, ultrametric={'PASS' if v_==0 else 'FAIL'}")
# Verify isomorphism
ref = trees["English"]
ref_leaves = sorted(ref.get_leaf_ids())
ref_matrix = [[ref.ultrametric_distance(a, b) for b in ref_leaves] for a in ref_leaves]
nl = len(ref_leaves)
for lang, t in trees.items():
leaves = sorted(t.get_leaf_ids())
matrix = [[t.ultrametric_distance(a, b) for a in leaves] for b in leaves]
iso = all(abs(ref_matrix[i][j] - matrix[i][j]) < 1e-10 for i in range(nl) for j in range(nl))
print(f" {'ISO' if iso else 'DIFF'} {lang}")
# --- T3: Corpus + Search ---
section("T3: Build Corpus & Search")
corpus = []
corpus.append(build_doc("Doc1: Dog bit man yesterday [exact]", "English", [
("b1","BITE","ACTION",None,4), ("d1","dog","ENTITY","b1",0), ("m1","man","ENTITY","b1",0),
("p1","PAST","TENSE","b1",2), ("y1","YESTERDAY","LOCATIVE","p1",0)]))
corpus.append(build_doc("Doc2: Dog chased cat [diff action]", "English", [
("c2","CHASE","ACTION",None,4), ("d2","dog","ENTITY","c2",0), ("cat","cat","ENTITY","c2",0),
("p2","PAST","TENSE","c2",2), ("y2","YESTERDAY","LOCATIVE","p2",0)]))
corpus.append(build_doc("Doc3: Man bit dog [reversed]", "English", [
("b3","BITE","ACTION",None,4), ("m3","man","ENTITY","b3",0), ("d3","dog","ENTITY","b3",0)]))
corpus.append(build_doc("Doc4: Cat eats fish [different]", "English", [
("e4","EAT","ACTION",None,4), ("c4","cat","ENTITY","e4",0), ("f4","fish","ENTITY","e4",0),
("n4","PRESENT","TENSE","e4",2)]))
corpus.append(build_doc("Doc5: Dog bit man in park [extended]", "English", [
("b5","BITE","ACTION",None,5), ("d5","dog","ENTITY","b5",0), ("m5","man","ENTITY","b5",0),
("p5","PAST","TENSE","b5",2), ("y5","YESTERDAY","LOCATIVE","p5",0), ("pk","park","LOCATIVE","b5",0)]))
all_ultra = all(d.verify_ultrametric()[1] == 0 for d in corpus)
print(f" Corpus: {len(corpus)} docs, all ultrametric={'PASS' if all_ultra else 'FAIL'}")
ranked = rank_results(en, corpus)
print(f"\n Search: 'dog bit man yesterday'")
for rank, (idx, name, mt, cov, ed, sc) in enumerate(ranked):
print(f" {rank+1}. [{mt}] {sc:.4f} {name}")
cuts = detect_clusters(ranked)
if cuts:
print(f"\n Cluster boundaries after ranks: {cuts}")
# --- T4: Cross-linguistic search ---
section("T4: Cross-Linguistic Search")
for lang in ["English", "Turkish", "Mohawk"]:
q = build_query_tree(lang)
r = rank_results(q, corpus)
if r:
print(f" {lang:<12}: top = {r[0][1][:50]} ({r[0][2]}, {r[0][5]:.4f})")
# --- T5: Type III fallback ---
section("T5: Type III Edit Distance Fallback")
cat_fish = build_doc("Cat eats fish", "English", [
("e","EAT","ACTION",None,4), ("c","cat","ENTITY","e",0), ("f","fish","ENTITY","e",0)])
dist = tree_edit_distance(en, cat_fish)
print(f" Query 'dog bit man' vs. Doc 'cat eats fish'")
print(f" Edit distance: {dist:.0f} (relabel EAT->BITE, cat->dog, fish->man...)")
print(f" Score: {max(0.0, 1.0 - dist/(4*5)):.4f}")
# --- Benchmark ---
section("T6: Performance")
for size in [5, 20]:
sub = corpus * (size // len(corpus)) + corpus[:size % len(corpus)]
start = time.time()
for _ in range(10):
for doc in sub:
_ = match_pipeline(en, doc)
ms = max(time.time() - start, 0.001) / (10 * len(sub)) * 1000
print(f" {len(sub):<3} docs: {ms:.3f} ms/query")
# --- Summary ---
section("SUMMARY")
print(f" Trees: 6 (1 query + 5 corpus)")
print(f" Languages: English, Turkish, Mohawk, Finnish, Inuktitut")
print(f" Matching: Type I (exact), Type II (partial), Type III (edit)")
print(f" Verification: All trees ultrametric, all languages isomorphic")
print(f" Cross-linguistic search: ALL languages match same top document")
print(f"\n >> DEMO COMPLETE <<")
return 0
if __name__ == "__main__":
sys.exit(main())