cocapn-core/cocapn/validation_loop.py at main · SuperInstance/cocapn-core · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
"""
Validation Loop for cocapn-core v3.1

Tiles can include optional `assertions` that are testable. The ValidationLoop
verifies these assertions and tracks tile quality scores.

Example tile with assertion:
    {
        "question": "What is priority inversion?",
        "answer": "Priority inversion occurs when...",
        "domain": "rtos_design",
        "assertions": [
            {"type": "contains", "pattern": "priority inheritance", "description": "Must mention PIP"},
            {"type": "contains", "pattern": "bounded", "description": "Must mention bounded wait"},
            {"type": "min_length", "value": 100, "description": "Answer must be substantive"}
        ]
    }

Assertion types:
- contains: answer must contain pattern (case-insensitive)
- not_contains: answer must NOT contain pattern
- min_length: answer must be at least N characters
- max_length: answer must be at most N characters
- domain_match: domain must match pattern (regex)
- references: answer must reference at least N of the listed terms
- no_hallucination_markers: answer must not contain common hallucination phrases
"""
import re
import time
import json
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum


class AssertionType(Enum):
    CONTAINS = "contains"
    NOT_CONTAINS = "not_contains"
    MIN_LENGTH = "min_length"
    MAX_LENGTH = "max_length"
    DOMAIN_MATCH = "domain_match"
    REFERENCES = "references"
    NO_HALLUCINATION = "no_hallucination_markers"


# Common hallucination markers — phrases LLMs generate when they're unsure
HALLUCINATION_MARKERS = [
    "it is worth noting that",
    "it's important to note",
    "as a language model",
    "i don't have personal",
    "as an ai",
    "i'm not sure",
    "it's difficult to",
    "the exact answer depends",
    "there is no definitive",
    "it's important to remember",
    "it should be noted",
    "in general",
    "broadly speaking",
]


@dataclass
class AssertionResult:
    assertion_type: str
    passed: bool
    description: str
    detail: str = ""
    score_impact: float = 0.0


@dataclass
class TileValidation:
    tile_id: str
    domain: str
    agent: str
    assertions_passed: int = 0
    assertions_total: int = 0
    quality_score: float = 0.0  # 0.0 to 1.0
    results: List[AssertionResult] = field(default_factory=list)
    validated_at: float = field(default_factory=time.time)
    warnings: List[str] = field(default_factory=list)


class AssertionValidator:
    """Validates individual assertions against tile data."""

    def validate(self, tile: Dict[str, Any], assertion: Dict[str, Any]) -> AssertionResult:
        atype = assertion.get("type", "")
        description = assertion.get("description", atype)

        try:
            handler = {
                AssertionType.CONTAINS.value: self._check_contains,
                AssertionType.NOT_CONTAINS.value: self._check_not_contains,
                AssertionType.MIN_LENGTH.value: self._check_min_length,
                AssertionType.MAX_LENGTH.value: self._check_max_length,
                AssertionType.DOMAIN_MATCH.value: self._check_domain_match,
                AssertionType.REFERENCES.value: self._check_references,
                AssertionType.NO_HALLUCINATION.value: self._check_hallucination,
            }.get(atype)

            if handler:
                return handler(tile, assertion)
            return AssertionResult(atype, False, description, f"Unknown assertion type: {atype}")
        except Exception as e:
            return AssertionResult(atype, False, description, f"Validation error: {e}")

    def _check_contains(self, tile: Dict, assertion: Dict) -> AssertionResult:
        pattern = assertion.get("pattern", "")
        answer = tile.get("answer", "").lower()
        question = tile.get("question", "").lower()
        combined = answer + " " + question
        passed = pattern.lower() in combined
        return AssertionResult(
            "contains", passed, assertion.get("description", f"Contains '{pattern}'"),
            f"Pattern '{pattern}' {'found' if passed else 'not found'}",
            0.1 if passed else -0.2
        )

    def _check_not_contains(self, tile: Dict, assertion: Dict) -> AssertionResult:
        pattern = assertion.get("pattern", "")
        answer = tile.get("answer", "").lower()
        passed = pattern.lower() not in answer
        return AssertionResult(
            "not_contains", passed, assertion.get("description", f"Not contains '{pattern}'"),
            f"Pattern '{pattern}' {'absent' if passed else 'found (violation)'}",
            0.05 if passed else -0.3
        )

    def _check_min_length(self, tile: Dict, assertion: Dict) -> AssertionResult:
        min_len = assertion.get("value", 50)
        answer = tile.get("answer", "")
        passed = len(answer) >= min_len
        return AssertionResult(
            "min_length", passed, assertion.get("description", f"Min length {min_len}"),
            f"Answer length: {len(answer)} (min: {min_len})",
            0.05 if passed else -0.1
        )

    def _check_max_length(self, tile: Dict, assertion: Dict) -> AssertionResult:
        max_len = assertion.get("value", 5000)
        answer = tile.get("answer", "")
        passed = len(answer) <= max_len
        return AssertionResult(
            "max_length", passed, assertion.get("description", f"Max length {max_len}"),
            f"Answer length: {len(answer)} (max: {max_len})",
            0.02 if passed else -0.15
        )

    def _check_domain_match(self, tile: Dict, assertion: Dict) -> AssertionResult:
        pattern = assertion.get("pattern", ".*")
        domain = tile.get("domain", "")
        passed = bool(re.match(pattern, domain))
        return AssertionResult(
            "domain_match", passed, assertion.get("description", f"Domain matches {pattern}"),
            f"Domain '{domain}' {'matches' if passed else 'does not match'} pattern '{pattern}'",
            0.05 if passed else -0.2
        )

    def _check_references(self, tile: Dict, assertion: Dict) -> AssertionResult:
        terms = assertion.get("terms", [])
        min_refs = assertion.get("min", 1)
        answer = tile.get("answer", "").lower()
        found = sum(1 for t in terms if t.lower() in answer)
        passed = found >= min_refs
        return AssertionResult(
            "references", passed, assertion.get("description", f"References {min_refs}+ of {terms}"),
            f"Found {found}/{len(terms)} terms ({terms[:5]}{'...' if len(terms) > 5 else ''})",
            0.1 * (found / max(len(terms), 1)) if passed else -0.15
        )

    def _check_hallucination(self, tile: Dict, assertion: Dict) -> AssertionResult:
        answer = tile.get("answer", "").lower()
        found_markers = [m for m in HALLUCINATION_MARKERS if m in answer]
        passed = len(found_markers) == 0
        return AssertionResult(
            "no_hallucination_markers", passed,
            assertion.get("description", "No hallucination markers"),
            f"{'No markers found' if passed else f'Found markers: {found_markers}'}",
            0.05 if passed else -0.2 * len(found_markers)
        )


class ValidationLoop:
    """
    Validates tiles against assertions and computes quality scores.
    Integrates with cocapn-core's Fleet engine.
    """

    def __init__(self, fleet=None):
        self.fleet = fleet
        self.validator = AssertionValidator()
        self._history: List[TileValidation] = []
        self._domain_scores: Dict[str, List[float]] = {}

    def validate_tile(self, tile: Dict[str, Any], assertions: List[Dict[str, Any]] = None) -> TileValidation:
        """Validate a single tile against its assertions."""
        tile_id = tile.get("id", tile.get("provenance", {}).get("original_id", "unknown"))
        domain = tile.get("domain", "general")
        agent = tile.get("agent", "unknown")

        # If no explicit assertions, generate default ones
        if assertions is None:
            assertions = self._default_assertions(domain)

        results = []
        score = 0.5  # Base score

        for assertion in assertions:
            result = self.validator.validate(tile, assertion)
            results.append(result)
            score += result.score_impact

        # Clamp score
        score = max(0.0, min(1.0, score))

        # Count passes
        passed = sum(1 for r in results if r.passed)

        validation = TileValidation(
            tile_id=tile_id,
            domain=domain,
            agent=agent,
            assertions_passed=passed,
            assertions_total=len(results),
            quality_score=score,
            results=results,
        )

        # Track warnings
        for r in results:
            if not r.passed:
                validation.warnings.append(f"[{r.assertion_type}] {r.description}: {r.detail}")

        # Update history
        self._history.append(validation)

        # Update domain scores
        if domain not in self._domain_scores:
            self._domain_scores[domain] = []
        self._domain_scores[domain].append(score)

        return validation

    def validate_batch(self, tiles: List[Dict[str, Any]], domain_assertions: Dict[str, List[Dict]] = None) -> List[TileValidation]:
        """Validate a batch of tiles."""
        return [self.validate_tile(t, (domain_assertions or {}).get(t.get("domain", "general"))) for t in tiles]

    def _default_assertions(self, domain: str) -> List[Dict[str, Any]]:
        """Generate sensible default assertions for a tile."""
        return [
            {"type": "min_length", "value": 50, "description": "Answer must be substantive"},
            {"type": "no_hallucination_markers", "description": "No AI hedging phrases"},
        ]

    def domain_report(self, domain: str) -> Dict[str, Any]:
        """Quality report for a specific domain."""
        scores = self._domain_scores.get(domain, [])
        if not scores:
            return {"domain": domain, "status": "no_data"}

        return {
            "domain": domain,
            "tile_count": len(scores),
            "avg_score": round(sum(scores) / len(scores), 3),
            "min_score": round(min(scores), 3),
            "max_score": round(max(scores), 3),
            "pass_rate": round(sum(1 for s in scores if s >= 0.7) / len(scores), 3),
            "distribution": self._score_distribution(scores),
        }

    def overall_report(self) -> Dict[str, Any]:
        """Quality report across all validated domains."""
        all_scores = [v.quality_score for v in self._history]
        if not all_scores:
            return {"status": "no_data"}

        domain_reports = {}
        for domain in self._domain_scores:
            domain_reports[domain] = self.domain_report(domain)

        # Sort domains by quality
        sorted_domains = sorted(
            domain_reports.items(),
            key=lambda x: x[1].get("avg_score", 0),
            reverse=True
        )

        return {
            "total_tiles": len(all_scores),
            "avg_score": round(sum(all_scores) / len(all_scores), 3),
            "pass_rate": round(sum(1 for s in all_scores if s >= 0.7) / len(all_scores), 3),
            "top_domains": sorted_domains[:10],
            "bottom_domains": sorted_domains[-5:],
            "domains_count": len(self._domain_scores),
        }

    def _score_distribution(self, scores: List[float]) -> Dict[str, int]:
        """Count tiles in score buckets."""
        buckets = {"excellent": 0, "good": 0, "acceptable": 0, "poor": 0, "fail": 0}
        for s in scores:
            if s >= 0.9:
                buckets["excellent"] += 1
            elif s >= 0.7:
                buckets["good"] += 1
            elif s >= 0.5:
                buckets["acceptable"] += 1
            elif s >= 0.3:
                buckets["poor"] += 1
            else:
                buckets["fail"] += 1
        return buckets


# CLI for standalone validation
if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage: python validation_loop.py <tiles.jsonl> [--domain <domain>]")
        sys.exit(1)

    tiles_file = sys.argv[1]
    filter_domain = None
    if "--domain" in sys.argv:
        idx = sys.argv.index("--domain")
        filter_domain = sys.argv[idx + 1]

    loop = ValidationLoop()

    with open(tiles_file) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                tile = json.loads(line)
                if filter_domain and tile.get("domain") != filter_domain:
                    continue
                result = loop.validate_tile(tile)
                if result.warnings:
                    print(f"  [{result.domain}] {result.tile_id}: score={result.quality_score:.2f} warnings={len(result.warnings)}")
            except json.JSONDecodeError:
                pass

    report = loop.overall_report()
    print(f"\n=== Validation Report ===")
    print(f"Tiles: {report['total_tiles']}")
    print(f"Avg score: {report['avg_score']}")
    print(f"Pass rate: {report['pass_rate']}")
    if report.get("top_domains"):
        print(f"\nTop domains:")
        for d, r in report["top_domains"][:5]:
            print(f"  {d}: avg={r['avg_score']}, count={r['tile_count']}")