diff --git a/docs/zh/harness/USAGE.md b/docs/zh/harness/USAGE.md index d15d7595..503d4236 100644 --- a/docs/zh/harness/USAGE.md +++ b/docs/zh/harness/USAGE.md @@ -66,3 +66,22 @@ make harness-validate ``` 这是开发检查,不是普通用户工作流的一部分。 + +## Trust model — a governance contract, not a sandbox + +本地边界由协议和工程闸门执行(identity stamping、scope clamping、fail-closed +config、durable audit),**不是** OS 级隔离:同一用户下的恶意进程仍然可以读取本地文件。 +各层实际承诺如下: + +- **T0(始终):** governance contract;wire 只接收 observations,kernel 是唯一 writer, + 每个 decision 都可归因。 +- **T1(当前):** 本地加固;私有 state tree(`.mnemon/harness`、其 `local`/ + `channel` 目录以及两个 credentials 目录)保持 owner-only(0700,setup rerun 会修正); + token 为 0600;`local run` 默认拒绝非 loopback listen address,除非显式传入 + `--allow-nonloopback`;`mnemon-harness token rotate --principal

` 会强制轮转 bearer + token(撤销即轮转;token 启动时加载,因此需要重启 `local run` 生效)。 +- **T2(remote phase):** authn/authz、transport encryption 和 audit 是 remote + coordination plane 的 admission 条件,而不是事后补丁。 +- **T3(ecosystem phase):** signature chains 和 sandboxed rules。 + +OS/process 级隔离明确**不属于** T0/T1 承诺。 diff --git a/harness/cmd/mnemon-harness/acceptance.go b/harness/cmd/mnemon-harness/acceptance.go index c436b482..b24357ff 100644 --- a/harness/cmd/mnemon-harness/acceptance.go +++ b/harness/cmd/mnemon-harness/acceptance.go @@ -96,27 +96,33 @@ type r1CodexAcceptanceOptions struct { } type r1CodexAcceptanceReport struct { - SchemaVersion int `json:"schema_version"` - Status string `json:"status"` - StartedAt string `json:"started_at"` - FinishedAt string `json:"finished_at"` - RunRoot string `json:"run_root"` - ReportPath string `json:"report_path"` - Topology *r1AcceptanceTopologyReport `json:"topology,omitempty"` - LocalAddr string `json:"local_addr"` - AgentTurns bool `json:"agent_turns"` - Starter string `json:"starter,omitempty"` - Assignee string `json:"assignee,omitempty"` - Agents []r1CodexAgentReport `json:"agents"` - Sync *r1CodexSyncReport `json:"sync,omitempty"` - Scenarios []r1TaskSimScenarioReport `json:"scenarios,omitempty"` - LedgerCounts map[string]int `json:"ledger_counts,omitempty"` - DerivedEventAudit map[string]int `json:"derived_event_audit,omitempty"` - Observability *acceptanceObserveReport `json:"observability,omitempty"` - Assertions []r1AcceptanceAssertion `json:"assertions"` - Errors []string `json:"errors,omitempty"` - Artifacts map[string]string `json:"artifacts,omitempty"` - Raw map[string]json.RawMessage `json:"raw,omitempty"` + SchemaVersion int `json:"schema_version"` + Status string `json:"status"` + StartedAt string `json:"started_at"` + FinishedAt string `json:"finished_at"` + RunRoot string `json:"run_root"` + ReportPath string `json:"report_path"` + Scenario string `json:"scenario,omitempty"` + Seed int64 `json:"seed,omitempty"` + Topology *r1AcceptanceTopologyReport `json:"topology,omitempty"` + LocalAddr string `json:"local_addr"` + AgentTurns bool `json:"agent_turns"` + Starter string `json:"starter,omitempty"` + Entrypoint string `json:"entrypoint,omitempty"` + Assignee string `json:"assignee,omitempty"` + Agents []r1CodexAgentReport `json:"agents"` + Sync *r1CodexSyncReport `json:"sync,omitempty"` + Scenarios []r1TaskSimScenarioReport `json:"scenarios,omitempty"` + RunnerContract *r1RunnerContractReport `json:"runner_contract,omitempty"` + Participants []r1ClusterParticipantReport `json:"participants,omitempty"` + Findings []r1ClusterFindingReport `json:"findings,omitempty"` + LedgerCounts map[string]int `json:"ledger_counts,omitempty"` + DerivedEventAudit map[string]int `json:"derived_event_audit,omitempty"` + Observability *acceptanceObserveReport `json:"observability,omitempty"` + Assertions []r1AcceptanceAssertion `json:"assertions"` + Errors []string `json:"errors,omitempty"` + Artifacts map[string]string `json:"artifacts,omitempty"` + Raw map[string]json.RawMessage `json:"raw,omitempty"` } type r1AcceptanceTopologyReport struct { @@ -438,7 +444,7 @@ func setupR1CodexAgents(runRoot, binDir, controlURL string, count int, sourceCod workspace: workspace, codexHome: codexHome, token: token, - env: acceptanceEnv(binDir, codexHome), + env: acceptanceEnv(binDir, codexHome, runRoot), }) } return agents, loaded, nil @@ -508,10 +514,21 @@ func copyRegularFile(src, dst string, mode os.FileMode) error { return out.Close() } -func acceptanceEnv(binDir, codexHome string) []string { +func acceptanceEnv(binDir, codexHome string, gitCeilingDirs ...string) []string { env := os.Environ() env = setEnv(env, "CODEX_HOME", codexHome) env = setEnv(env, "PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + if len(gitCeilingDirs) > 0 { + var dirs []string + for _, dir := range gitCeilingDirs { + if dir != "" { + dirs = append(dirs, dir) + } + } + if len(dirs) > 0 { + env = setEnv(env, "GIT_CEILING_DIRECTORIES", strings.Join(dirs, string(os.PathListSeparator))) + } + } return env } @@ -933,6 +950,7 @@ func startR1SyncHub(runRoot string, count int) (r1SyncHub, error) { } scopes := []contract.ResourceRef{ {Kind: "agent_profile", ID: "project"}, + {Kind: "project_intent", ID: "project"}, {Kind: "teamwork_signal", ID: "project"}, {Kind: "assignment", ID: "project"}, {Kind: "progress_digest", ID: "project"}, @@ -1020,6 +1038,7 @@ func r1SyncEventSubjectsOnlyAccepted(labels []string) bool { "agent_profile:project": true, "assignment:project": true, "progress_digest:project": true, + "project_intent:project": true, "teamwork_signal:project": true, } for _, label := range labels { @@ -1087,7 +1106,7 @@ func setupR1CodexSyncAgents(ctx context.Context, runRoot, binDir string, hub r1S workspace: workspace, codexHome: codexHome, token: token, - env: acceptanceEnv(binDir, codexHome), + env: acceptanceEnv(binDir, codexHome, runRoot), }, localURL: localURL, replicaPrincipal: hub.Principals[i-1], @@ -1206,6 +1225,7 @@ func waitForLedgerCount(controlURL string, agent r1CodexAgent, kind string, want func countR1Ledger(controlURL string, agent r1CodexAgent) map[string]int { out := map[string]int{ "agent_profile": 0, + "project_intent": 0, "teamwork_signal": 0, "assignment": 0, "progress_digest": 0, diff --git a/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint.go b/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint.go new file mode 100644 index 00000000..d9ed9a8b --- /dev/null +++ b/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint.go @@ -0,0 +1,878 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "math/rand" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/mnemon-dev/mnemon/harness/internal/mnemond/access" + "github.com/spf13/cobra" +) + +var ( + acceptanceClusterScenario string + acceptanceClusterSeed int64 + acceptanceClusterWakeCycles int + acceptanceClusterWakeInterval time.Duration + acceptanceClusterEntrypoint string +) + +const r1ClusterWorkerWakePrompt = `Check your Mnemon context. If there is governed work for you, act on it through +your own Local Mnemon and record durable progress. If there is no work for you, +answer "no governed work".` + +var acceptanceR1ClusterSingleEntrypointCmd = &cobra.Command{ + Use: "r1-cluster-single-entrypoint", + Short: "Run productization phase-1 single-entrypoint cluster validation", + RunE: func(cmd *cobra.Command, args []string) error { + report, err := runR1ClusterSingleEntrypointAcceptance(cmd.Context(), r1ClusterSingleEntrypointOptions{ + r1CodexAcceptanceOptions: r1CodexAcceptanceOptions{ + RunRoot: acceptanceRunRoot, + Command: acceptanceCommand, + CodexHome: acceptanceCodexHome, + Agents: acceptanceAgents, + AgentTurns: acceptanceAgentTurns, + TurnTimeout: acceptanceTurnTimeout, + Stdout: cmd.OutOrStdout(), + Stderr: cmd.ErrOrStderr(), + }, + Scenario: acceptanceClusterScenario, + Seed: acceptanceClusterSeed, + WakeCycles: acceptanceClusterWakeCycles, + WakeInterval: acceptanceClusterWakeInterval, + Entrypoint: acceptanceClusterEntrypoint, + }) + if report.ReportPath != "" { + fmt.Fprintf(cmd.OutOrStdout(), "acceptance report: %s\n", report.ReportPath) + } + if err != nil { + return err + } + if report.Status != "ok" { + return fmt.Errorf("R1 cluster single-entrypoint acceptance status: %s", report.Status) + } + return nil + }, +} + +func init() { + acceptanceR1ClusterSingleEntrypointCmd.Flags().StringVar(&acceptanceRunRoot, "run-root", "", "acceptance run directory") + acceptanceR1ClusterSingleEntrypointCmd.Flags().StringVar(&acceptanceCommand, "command", "codex --dangerously-bypass-hook-trust", "Codex CLI command") + acceptanceR1ClusterSingleEntrypointCmd.Flags().StringVar(&acceptanceCodexHome, "codex-home-source", "", "source CODEX_HOME to copy auth/config from") + acceptanceR1ClusterSingleEntrypointCmd.Flags().IntVar(&acceptanceAgents, "agents", 5, "number of Codex appservers") + acceptanceR1ClusterSingleEntrypointCmd.Flags().BoolVar(&acceptanceAgentTurns, "agent-turns", false, "run real model turns that write governed R1 cluster events") + acceptanceR1ClusterSingleEntrypointCmd.Flags().DurationVar(&acceptanceTurnTimeout, "turn-timeout", 5*time.Minute, "timeout per real agent turn") + acceptanceR1ClusterSingleEntrypointCmd.Flags().StringVar(&acceptanceClusterScenario, "scenario", "project-validation", "phase-1 scenario: project-validation or seeded-defect") + acceptanceR1ClusterSingleEntrypointCmd.Flags().Int64Var(&acceptanceClusterSeed, "seed", 0, "random seed for entrypoint selection; defaults to current time") + acceptanceR1ClusterSingleEntrypointCmd.Flags().IntVar(&acceptanceClusterWakeCycles, "wake-cycles", 4, "generic worker wake cycles") + acceptanceR1ClusterSingleEntrypointCmd.Flags().DurationVar(&acceptanceClusterWakeInterval, "wake-interval", 3*time.Second, "delay between worker wake cycles") + acceptanceR1ClusterSingleEntrypointCmd.Flags().StringVar(&acceptanceClusterEntrypoint, "entrypoint", "", "explicit entrypoint principal; empty chooses by seed") + acceptanceCmd.AddCommand(acceptanceR1ClusterSingleEntrypointCmd) +} + +type r1ClusterSingleEntrypointOptions struct { + r1CodexAcceptanceOptions + Scenario string + Seed int64 + WakeCycles int + WakeInterval time.Duration + Entrypoint string +} + +type r1RunnerContractReport struct { + ProfileBootstrapPrompts int `json:"profile_bootstrap_prompts"` + BusinessTaskPrompts int `json:"business_task_prompts"` + WorkerWakePrompts int `json:"worker_wake_prompts"` + DirectWorkerBusinessPrompts int `json:"direct_worker_business_prompts"` + IntegrationPrompts int `json:"integration_prompts"` + ManualEventWrites int `json:"manual_event_writes"` + EntrypointProgressBeforeIntegration int `json:"entrypoint_progress_before_integration"` + EntrypointProgressAfterIntegration int `json:"entrypoint_progress_after_integration"` + SyncSettleSeconds int `json:"sync_settle_seconds,omitempty"` + WorkerWakePrompt string `json:"worker_wake_prompt"` + EntryBusinessPrompt string `json:"entry_business_prompt,omitempty"` + IntegrationPrompt string `json:"integration_prompt,omitempty"` + PromptAudit []r1RunnerPromptReport `json:"prompt_audit,omitempty"` + WorkerWakeErrors []string `json:"worker_wake_errors,omitempty"` +} + +type r1RunnerPromptReport struct { + Index int `json:"index"` + Principal string `json:"principal"` + Kind string `json:"kind"` + Prompt string `json:"prompt"` +} + +type r1ClusterParticipantReport struct { + Principal string `json:"principal"` + Roles []string `json:"roles"` + EventCounts map[string]int `json:"event_counts"` +} + +type r1ClusterFindingReport struct { + Kind string `json:"kind"` + Summary string `json:"summary"` + Evidence string `json:"evidence"` + Resolved bool `json:"resolved"` +} + +func runR1ClusterSingleEntrypointAcceptance(ctx context.Context, opts r1ClusterSingleEntrypointOptions) (r1CodexAcceptanceReport, error) { + if opts.Stdout == nil { + opts.Stdout = io.Discard + } + if opts.Stderr == nil { + opts.Stderr = io.Discard + } + if opts.Command == "" { + opts.Command = "codex" + } + if opts.Agents < 5 { + opts.Agents = 5 + } + if opts.TurnTimeout <= 0 { + opts.TurnTimeout = 5 * time.Minute + } + if opts.WakeCycles <= 0 { + opts.WakeCycles = 4 + } + if opts.WakeInterval <= 0 { + opts.WakeInterval = 3 * time.Second + } + if opts.Scenario == "" { + opts.Scenario = "project-validation" + } + if opts.Scenario != "project-validation" && opts.Scenario != "seeded-defect" { + return r1CodexAcceptanceReport{}, fmt.Errorf("unsupported cluster scenario %q", opts.Scenario) + } + if opts.Seed == 0 { + opts.Seed = time.Now().UnixNano() + } + started := time.Now().UTC().Truncate(time.Second) + runRoot := opts.RunRoot + if runRoot == "" { + runRoot = filepath.Join(".testdata", "r1-cluster-single-entrypoint", started.Format("20060102T150405Z")) + } + runRoot, err := filepath.Abs(runRoot) + if err != nil { + return r1CodexAcceptanceReport{}, err + } + report := r1CodexAcceptanceReport{ + SchemaVersion: 1, + Status: "running", + StartedAt: started.Format(time.RFC3339), + RunRoot: runRoot, + Scenario: opts.Scenario, + Seed: opts.Seed, + AgentTurns: opts.AgentTurns, + LedgerCounts: map[string]int{}, + DerivedEventAudit: map[string]int{}, + Artifacts: map[string]string{}, + Raw: map[string]json.RawMessage{}, + RunnerContract: &r1RunnerContractReport{ + EntrypointProgressBeforeIntegration: -1, + EntrypointProgressAfterIntegration: -1, + WorkerWakePrompt: r1ClusterWorkerWakePrompt, + }, + } + reportPath := filepath.Join(runRoot, "report.json") + report.ReportPath = reportPath + defer func() { + report.FinishedAt = time.Now().UTC().Truncate(time.Second).Format(time.RFC3339) + _ = os.MkdirAll(filepath.Dir(reportPath), 0o755) + data, _ := json.MarshalIndent(report, "", " ") + _ = os.WriteFile(reportPath, append(data, '\n'), 0o644) + }() + + if err := prepareR1AcceptanceRunRoot(runRoot); err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + binDir, err := installAcceptanceHarnessBinary(runRoot) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + hub, err := startR1SyncHub(runRoot, opts.Agents) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + defer hub.close() + sourceCodexHome := resolveSourceCodexHome(opts.CodexHome) + sourceRoot, _ := filepath.Abs(".") + report.Artifacts["codex_home_source"] = sourceCodexHome + report.Artifacts["project_snapshot_source"] = sourceRoot + report.Artifacts["hub_db"] = filepath.Join(runRoot, "hub", "hub.db") + report.Artifacts["hub_audit"] = hub.AuditPath + + agents, err := setupR1CodexSyncAgents(ctx, runRoot, binDir, hub, opts.Agents, sourceCodexHome) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + defer stopR1CodexSyncAgents(agents) + for i := range agents { + if err := copyR1ClusterProjectSnapshot(sourceRoot, agents[i].workspace, runRoot); err != nil { + addR1Error(&report, fmt.Errorf("%s: copy project snapshot: %w", agents[i].principal, err)) + report.Status = "blocked" + return report, err + } + } + if opts.Scenario == "seeded-defect" { + rel, err := applyR1ClusterSeededDefect(agents) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + report.Artifacts["seeded_defect"] = rel + } + + report.Topology = buildR1ProdSimTopology(agents) + addR1Assertion(&report, "cluster strict per-hostagent mnemond topology", prodSimStrictTopology(report.Topology), fmt.Sprintf("%+v", report.Topology)) + for _, agent := range agents { + report.Artifacts["mnemond:"+agent.principal] = prodSimMnemondPath(agent) + report.Artifacts["render_audit:"+agent.principal] = agent.renderAuditPath + } + syncReport := &r1CodexSyncReport{ + Status: "running", + HubURL: hub.URL, + AllowedEventSubjects: hub.AllowedEventSubjects, + Agents: []r1CodexAgentReport{}, + Artifacts: map[string]string{ + "hub_db": report.Artifacts["hub_db"], + "hub_audit": hub.AuditPath, + }, + } + report.Sync = syncReport + + for i := range agents { + if err := startR1CodexAppserver(&agents[i].r1CodexAgent, opts.Command); err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + agentReport, raw, err := initializeR1CodexAgent(&agents[i].r1CodexAgent, opts.TurnTimeout) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + syncReport.Agents = append(syncReport.Agents, agentReport) + report.Agents = append(report.Agents, agentReport) + if raw != nil { + report.Raw[agents[i].principal+":hooks"] = raw + } + } + addR1Assertion(&report, "cluster 5/5 appservers start/init", len(report.Agents) == opts.Agents, fmt.Sprintf("started=%d requested=%d", len(report.Agents), opts.Agents)) + addR1ClusterHookAssertions(&report, opts.r1CodexAcceptanceOptions) + if !opts.AgentTurns { + addR1Assertion(&report, "cluster real agent turns requested", false, "rerun with --agent-turns") + report.Status = "failed" + return report, fmt.Errorf("R1 cluster single-entrypoint acceptance requires --agent-turns") + } + + run := prodSimRun{ + ctx: ctx, + opts: r1ProdSimAcceptanceOptions{r1CodexAcceptanceOptions: opts.r1CodexAcceptanceOptions}, + report: &report, + agents: agents, + runID: started.Format("150405"), + } + report.RunnerContract.ProfileBootstrapPrompts = len(agents) + if err := run.bootstrapProfiles(); err != nil { + addR1Error(&report, err) + } + + entryIndex, err := chooseR1ClusterEntrypoint(agents, opts.Entrypoint, opts.Seed) + if err != nil { + addR1Error(&report, err) + report.Status = "blocked" + return report, err + } + entrypoint := &agents[entryIndex] + report.Entrypoint = entrypoint.principal + report.Starter = entrypoint.principal + syncReport.Source = entrypoint.principal + addR1Assertion(&report, "cluster entrypoint selected by seed or explicit flag", entrypoint.principal != "", fmt.Sprintf("entrypoint=%s seed=%d explicit=%q", entrypoint.principal, opts.Seed, opts.Entrypoint)) + + businessPrompt := r1ClusterBusinessTaskPrompt(opts.Scenario) + report.RunnerContract.EntryBusinessPrompt = businessPrompt + report.RunnerContract.BusinessTaskPrompts++ + recordR1ClusterPrompt(report.RunnerContract, entrypoint.principal, "business_task", businessPrompt) + answer, err := runR1Turn(&entrypoint.r1CodexAgent, businessPrompt, opts.TurnTimeout) + appendSyncAgentAnswer(syncReport, entrypoint.principal, answer) + if err != nil { + addR1Assertion(&report, "cluster entrypoint accepts single business prompt", false, err.Error()) + addR1Error(&report, err) + } else { + addR1Assertion(&report, "cluster entrypoint accepts single business prompt", true, truncateR1Cluster(answer, 300)) + } + waitForLedgerCount(entrypoint.localURL, entrypoint.r1CodexAgent, "assignment", 2, 60*time.Second) + + for cycle := 1; cycle <= opts.WakeCycles; cycle++ { + if ctx.Err() != nil { + addR1Error(&report, ctx.Err()) + break + } + for i := range agents { + if i == entryIndex { + continue + } + worker := &agents[i] + report.RunnerContract.WorkerWakePrompts++ + recordR1ClusterPrompt(report.RunnerContract, worker.principal, "worker_wake", r1ClusterWorkerWakePrompt) + answer, err := runR1Turn(&worker.r1CodexAgent, r1ClusterWorkerWakePrompt, opts.TurnTimeout) + appendSyncAgentAnswer(syncReport, worker.principal, answer) + if err != nil { + report.RunnerContract.WorkerWakeErrors = append(report.RunnerContract.WorkerWakeErrors, fmt.Sprintf("cycle %d %s: %v", cycle, worker.principal, err)) + } + } + obs, err := observeAcceptanceRun(runRoot, 1000) + if err == nil && r1ClusterProgressReady(r1ClusterActorEventCounts(obs), entrypoint.principal) { + break + } + if cycle == opts.WakeCycles { + break + } + select { + case <-ctx.Done(): + addR1Error(&report, ctx.Err()) + cycle = opts.WakeCycles + case <-time.After(opts.WakeInterval): + } + } + + report.RunnerContract.SyncSettleSeconds = int((10 * time.Second).Seconds()) + waitR1ClusterAcceptedEventSettle(runRoot, 10*time.Second, 2*time.Second) + if obs, err := observeAcceptanceRun(runRoot, 1000); err == nil { + report.RunnerContract.EntrypointProgressBeforeIntegration = r1ClusterActorEventCounts(obs)[entrypoint.principal]["progress_digest"] + } else { + addR1Error(&report, fmt.Errorf("pre-integration observe: %w", err)) + } + integrationPrompt := r1ClusterIntegrationPrompt() + report.RunnerContract.IntegrationPrompt = integrationPrompt + report.RunnerContract.IntegrationPrompts++ + recordR1ClusterPrompt(report.RunnerContract, entrypoint.principal, "integration", integrationPrompt) + answer, err = runR1Turn(&entrypoint.r1CodexAgent, integrationPrompt, opts.TurnTimeout) + appendSyncAgentAnswer(syncReport, entrypoint.principal, answer) + if err != nil { + addR1Assertion(&report, "cluster entrypoint final integration turn completes", false, err.Error()) + addR1Error(&report, err) + } else { + addR1Assertion(&report, "cluster entrypoint final integration turn completes", true, truncateR1Cluster(answer, 300)) + } + waitForLedgerCount(entrypoint.localURL, entrypoint.r1CodexAgent, "progress_digest", 3, 30*time.Second) + + if client, err := access.NewSyncClient(hub.URL, access.SyncClientConfig{Token: entrypoint.replicaToken}); err == nil { + syncReport.HubStatus, err = client.SyncStatus() + if err != nil { + addR1Assertion(&report, "cluster mnemonhub status readable", false, err.Error()) + } else { + addR1Assertion(&report, "cluster mnemonhub exchanges accepted events", syncReport.HubStatus.HubEventsReceived > 0 && syncReport.HubStatus.HubEventsServed > 0, fmt.Sprintf("received=%d served=%d", syncReport.HubStatus.HubEventsReceived, syncReport.HubStatus.HubEventsServed)) + } + } else { + addR1Assertion(&report, "cluster mnemonhub status readable", false, err.Error()) + } + + report.LedgerCounts = countR1Ledger(entrypoint.localURL, entrypoint.r1CodexAgent) + report.DerivedEventAudit = prodSimDerivedAudit(agents) + obs, obsErr := observeAcceptanceRun(runRoot, 1000) + if obsErr == nil { + report.Observability = &obs + } else { + addR1Error(&report, obsErr) + } + actorCounts := r1ClusterActorEventCounts(obs) + report.RunnerContract.EntrypointProgressAfterIntegration = actorCounts[entrypoint.principal]["progress_digest"] + report.Participants = r1ClusterParticipants(actorCounts, entrypoint.principal) + finalAnswer := latestR1ClusterAgentAnswer(syncReport, entrypoint.principal) + report.Findings = []r1ClusterFindingReport{r1ClusterFindingFromAnswer(finalAnswer, report.LedgerCounts)} + + addR1ClusterAuditAssertions(&report, syncReport, actorCounts, finalAnswer, opts.WakeCycles) + if report.Observability != nil { + addR1Assertion(&report, "cluster observability sees strict topology", report.Observability.Topology.Mode == "per-hostagent-mnemond" && !report.Observability.Topology.SharedMnemond, fmt.Sprintf("mode=%s shared=%t mnemond=%d hub=%d", report.Observability.Topology.Mode, report.Observability.Topology.SharedMnemond, report.Observability.Topology.MnemondStores, report.Observability.Topology.MnemonhubStores)) + } else { + addR1Assertion(&report, "cluster observability sees strict topology", false, "observe report unavailable") + } + + scenarioOK := len(report.Errors) == 0 && allR1AssertionsPassed(report.Assertions) + report.Scenarios = append(report.Scenarios, r1TaskSimScenarioReport{ + Name: "cluster_single_entrypoint", + Status: statusFromBool(scenarioOK), + Actors: r1ClusterParticipantPrincipals(report.Participants), + Evidence: map[string]any{ + "entrypoint": entrypoint.principal, + "seed": opts.Seed, + "ledger_counts": report.LedgerCounts, + }, + }) + syncReport.Status = statusFromBool(scenarioOK) + if scenarioOK { + report.Status = "ok" + return report, nil + } + report.Status = "failed" + return report, fmt.Errorf("R1 cluster single-entrypoint acceptance failed") +} + +func addR1ClusterHookAssertions(report *r1CodexAcceptanceReport, opts r1CodexAcceptanceOptions) { + allHooks := true + allTrusted := true + for _, ar := range report.Agents { + if ar.HookCount < 4 || !ar.ManualHookReminded { + allHooks = false + } + for _, st := range ar.HookTrustStatuses { + if st != "trusted" && st != "managed" { + allTrusted = false + } + } + } + addR1Assertion(report, "cluster preflight hooks discovered and remind", allHooks, "each appserver lists R1 hooks and manual lifecycle reminder succeeds") + hookTrustApproved := allTrusted || strings.Contains(opts.Command, "--dangerously-bypass-hook-trust") + hookTrustDetail := "trust status must be trusted or managed for generic lifecycle hook proof" + if !allTrusted && hookTrustApproved { + hookTrustDetail = "project hooks list as untrusted, but this appserver invocation used --dangerously-bypass-hook-trust as explicit operator approval" + } + addR1Assertion(report, "cluster preflight project hooks approved", hookTrustApproved, hookTrustDetail) +} + +func addR1ClusterAuditAssertions(report *r1CodexAcceptanceReport, syncReport *r1CodexSyncReport, actorCounts map[string]map[string]int, finalAnswer string, wakeCycles int) { + entrypoint := report.Entrypoint + workerProgress := r1ClusterWorkerProgressActors(actorCounts, entrypoint) + nonProfileParticipants := r1ClusterNonProfileParticipantCount(actorCounts) + addR1Assertion(report, "cluster exactly one business task prompt before worker wakes", report.RunnerContract != nil && report.RunnerContract.BusinessTaskPrompts == 1 && r1ClusterBusinessBeforeWake(report.RunnerContract), fmt.Sprintf("business=%d wake=%d", report.RunnerContract.BusinessTaskPrompts, report.RunnerContract.WorkerWakePrompts)) + addR1Assertion(report, "cluster non-entrypoint prompts are generic wakes only", report.RunnerContract != nil && report.RunnerContract.DirectWorkerBusinessPrompts == 0 && r1ClusterWorkerPromptsGeneric(report.RunnerContract), fmt.Sprintf("worker_wake_prompts=%d direct_worker_business=%d", report.RunnerContract.WorkerWakePrompts, report.RunnerContract.DirectWorkerBusinessPrompts)) + addR1Assertion(report, "cluster runner wakes every non-entrypoint through generic prompt", report.RunnerContract != nil && r1ClusterWokeAllNonEntrypoints(report.RunnerContract, report.Agents, entrypoint), fmt.Sprintf("worker_wake_prompts=%d agents=%d entrypoint=%s", report.RunnerContract.WorkerWakePrompts, len(report.Agents), entrypoint)) + addR1Assertion(report, "cluster at least three hostagents participate through accepted events", nonProfileParticipants >= 3, fmt.Sprintf("participants=%d actor_counts=%v", nonProfileParticipants, actorCounts)) + addR1Assertion(report, "cluster entrypoint emits coordination events", actorCounts[entrypoint]["teamwork_signal"] >= 1 && actorCounts[entrypoint]["assignment"] >= 2, fmt.Sprintf("entrypoint=%s counts=%v", entrypoint, actorCounts[entrypoint])) + addR1Assertion(report, "cluster entrypoint records project intent or integration progress", actorCounts[entrypoint]["project_intent"] >= 1 || actorCounts[entrypoint]["progress_digest"] >= 1, fmt.Sprintf("entrypoint=%s counts=%v", entrypoint, actorCounts[entrypoint])) + addR1Assertion(report, "cluster entrypoint records final integration progress", report.RunnerContract.EntrypointProgressBeforeIntegration >= 0 && report.RunnerContract.EntrypointProgressAfterIntegration > report.RunnerContract.EntrypointProgressBeforeIntegration, fmt.Sprintf("entrypoint_progress_before=%d after=%d", report.RunnerContract.EntrypointProgressBeforeIntegration, report.RunnerContract.EntrypointProgressAfterIntegration)) + addR1Assertion(report, "cluster workers act because of Mnemon context", len(workerProgress) >= 2 && report.RunnerContract.DirectWorkerBusinessPrompts == 0, fmt.Sprintf("worker_progress_actors=%v", workerProgress)) + addR1Assertion(report, "cluster at least two non-entrypoint progress_digest actors", len(workerProgress) >= 2, fmt.Sprintf("worker_progress_actors=%v", workerProgress)) + addR1Assertion(report, "cluster entrypoint reads worker progress and returns integrated answer", report.LedgerCounts["progress_digest"] >= 2 && r1ClusterFinalAnswerCitesEvidence(finalAnswer), fmt.Sprintf("progress_digest=%d final=%s", report.LedgerCounts["progress_digest"], truncateR1Cluster(finalAnswer, 400))) + addR1Assertion(report, "cluster mnemonhub moves accepted event subjects only", r1SyncEventSubjectsOnlyAccepted(syncReport.AllowedEventSubjects) && syncReport.HubStatus.HubEventsReceived > 0 && syncReport.HubStatus.HubEventsServed > 0, fmt.Sprintf("event_subjects=%v received=%d served=%d", syncReport.AllowedEventSubjects, syncReport.HubStatus.HubEventsReceived, syncReport.HubStatus.HubEventsServed)) + addR1Assertion(report, "cluster no assignment_status/assignment_expired invented", report.LedgerCounts["assignment_status"] == 0 && report.LedgerCounts["assignment_expired"] == 0, fmt.Sprintf("assignment_status=%d assignment_expired=%d", report.LedgerCounts["assignment_status"], report.LedgerCounts["assignment_expired"])) + addR1Assertion(report, "cluster no manual event repair", report.RunnerContract != nil && report.RunnerContract.ManualEventWrites == 0, fmt.Sprintf("manual_event_writes=%d", report.RunnerContract.ManualEventWrites)) + addR1Assertion(report, "cluster wake cycles bounded by runner contract", wakeCycles > 0 && report.RunnerContract.WorkerWakePrompts <= wakeCycles*(len(report.Agents)-1), fmt.Sprintf("wake_prompts=%d wake_cycles=%d agents=%d", report.RunnerContract.WorkerWakePrompts, wakeCycles, len(report.Agents))) +} + +func r1ClusterBusinessTaskPrompt(scenario string) string { + extra := "" + if scenario == "seeded-defect" { + extra = "\nThis run may include a seeded repository defect. Treat it as ordinary project state: discover it from inspection or tests, then coordinate any fix or review through Mnemon." + } + return `You are the entrypoint for a Mnemon agent cluster. + +Using Mnemon as the only cluster coordination channel, organize the cluster to +test this repository in a realistic way: + +1. Establish the validation objective. +2. Inspect the current project state in this workspace. +3. Delegate at least two distinct validation or review tasks to other agents. +4. Have workers report findings through Mnemon. +5. Identify at least one concrete issue, risk, or gap if one exists. +6. Analyze the issue and either fix it, propose a minimal fix, or explain why no + code change is justified. +7. Ask for independent review when useful. +8. Integrate the final result and answer the user with evidence. + +Read available agent profiles through your own Local Mnemon before assigning +work. Choose assignees yourself from Mnemon context. Use the standard governed +events project_intent, teamwork_signal, assignment, and progress_digest as the +durable coordination channel, following the managed GUIDE and observe skill. +Do not message workers directly. Do not wait for worker output in this turn: +create the coordination graph, then answer briefly with the events you recorded.` + extra +} + +func r1ClusterIntegrationPrompt() string { + return `Read your Mnemon context through your own Local Mnemon and integrate the cluster work for the user. +Use only Mnemon events as worker evidence. Do not use runner wake answers as evidence. +If the cluster result is ready, record a final progress_digest through your own Local Mnemon. +Answer with participants, event-backed evidence, concrete issue/risk/gap or no-defect rationale, fix/proposed fix/no-code-change decision, and remaining risk.` +} + +func chooseR1ClusterEntrypoint(agents []r1CodexSyncAgent, explicit string, seed int64) (int, error) { + if len(agents) == 0 { + return -1, fmt.Errorf("no agents available") + } + explicit = strings.TrimSpace(explicit) + if explicit != "" { + for i := range agents { + if agents[i].principal == explicit { + return i, nil + } + } + return -1, fmt.Errorf("entrypoint %q is not one of the appservers", explicit) + } + rng := rand.New(rand.NewSource(seed)) + return rng.Intn(len(agents)), nil +} + +func recordR1ClusterPrompt(contract *r1RunnerContractReport, principal, kind, prompt string) { + if contract == nil { + return + } + contract.PromptAudit = append(contract.PromptAudit, r1RunnerPromptReport{ + Index: len(contract.PromptAudit) + 1, + Principal: principal, + Kind: kind, + Prompt: truncateR1Cluster(prompt, 2000), + }) +} + +func r1ClusterBusinessBeforeWake(contract *r1RunnerContractReport) bool { + if contract == nil { + return false + } + businessSeen := 0 + for _, prompt := range contract.PromptAudit { + switch prompt.Kind { + case "business_task": + businessSeen++ + case "worker_wake": + return businessSeen == 1 + } + } + return businessSeen == 1 +} + +func r1ClusterWorkerPromptsGeneric(contract *r1RunnerContractReport) bool { + if contract == nil { + return false + } + for _, prompt := range contract.PromptAudit { + if prompt.Kind == "worker_wake" && prompt.Prompt != r1ClusterWorkerWakePrompt { + return false + } + } + return true +} + +func r1ClusterWokeAllNonEntrypoints(contract *r1RunnerContractReport, agents []r1CodexAgentReport, entrypoint string) bool { + if contract == nil { + return false + } + woke := map[string]bool{} + for _, prompt := range contract.PromptAudit { + if prompt.Kind == "worker_wake" { + woke[prompt.Principal] = true + } + } + workers := 0 + for _, agent := range agents { + if agent.Principal == entrypoint { + continue + } + workers++ + if !woke[agent.Principal] { + return false + } + } + return workers > 0 +} + +func waitR1ClusterAcceptedEventSettle(runRoot string, timeout, stableFor time.Duration) { + deadline := time.Now().Add(timeout) + lastCount := -1 + stableSince := time.Now() + for time.Now().Before(deadline) { + obs, err := observeAcceptanceRun(runRoot, 1000) + if err == nil { + count := r1ClusterAcceptedEventCount(obs) + if count != lastCount { + lastCount = count + stableSince = time.Now() + } else if time.Since(stableSince) >= stableFor { + return + } + } + time.Sleep(500 * time.Millisecond) + } +} + +func r1ClusterAcceptedEventCount(obs acceptanceObserveReport) int { + count := 0 + for _, ev := range obs.CrossEvents { + if ev.Status == "accepted" { + count++ + } + } + return count +} + +func r1ClusterActorEventCounts(obs acceptanceObserveReport) map[string]map[string]int { + out := map[string]map[string]int{} + for _, ev := range obs.CrossEvents { + if ev.Status != "accepted" || ev.Actor == "" { + continue + } + kind := r1ClusterKindFromEventSubject(ev.EventSubject) + if kind == "" { + continue + } + if out[ev.Actor] == nil { + out[ev.Actor] = map[string]int{} + } + out[ev.Actor][kind]++ + } + return out +} + +func r1ClusterKindFromEventSubject(subject string) string { + if idx := strings.Index(subject, "/"); idx > 0 { + return subject[:idx] + } + if idx := strings.Index(subject, ":"); idx > 0 { + return subject[:idx] + } + return "" +} + +func r1ClusterProgressReady(actorCounts map[string]map[string]int, entrypoint string) bool { + return actorCounts[entrypoint]["teamwork_signal"] >= 1 && + actorCounts[entrypoint]["assignment"] >= 2 && + len(r1ClusterWorkerProgressActors(actorCounts, entrypoint)) >= 2 +} + +func r1ClusterWorkerProgressActors(actorCounts map[string]map[string]int, entrypoint string) []string { + var out []string + for actor, counts := range actorCounts { + if actor == entrypoint { + continue + } + if counts["progress_digest"] > 0 { + out = append(out, actor) + } + } + sort.Strings(out) + return out +} + +func r1ClusterNonProfileParticipantCount(actorCounts map[string]map[string]int) int { + count := 0 + for _, kinds := range actorCounts { + for kind, n := range kinds { + if kind != "agent_profile" && n > 0 { + count++ + break + } + } + } + return count +} + +func r1ClusterParticipants(actorCounts map[string]map[string]int, entrypoint string) []r1ClusterParticipantReport { + var principals []string + for principal := range actorCounts { + principals = append(principals, principal) + } + sort.Strings(principals) + out := make([]r1ClusterParticipantReport, 0, len(principals)) + for _, principal := range principals { + counts := actorCounts[principal] + var roles []string + if principal == entrypoint { + roles = append(roles, "entrypoint") + } + if counts["project_intent"] > 0 || counts["teamwork_signal"] > 0 || counts["assignment"] > 0 { + roles = append(roles, "coordinator") + } + if counts["progress_digest"] > 0 && principal != entrypoint { + roles = append(roles, "worker") + } + if counts["agent_profile"] > 0 { + roles = append(roles, "profiled") + } + sort.Strings(roles) + out = append(out, r1ClusterParticipantReport{Principal: principal, Roles: roles, EventCounts: counts}) + } + return out +} + +func r1ClusterParticipantPrincipals(participants []r1ClusterParticipantReport) []string { + out := make([]string, 0, len(participants)) + for _, p := range participants { + out = append(out, p.Principal) + } + sort.Strings(out) + return out +} + +func latestR1ClusterAgentAnswer(report *r1CodexSyncReport, principal string) string { + if report == nil { + return "" + } + for _, agent := range report.Agents { + if agent.Principal != principal || len(agent.FinalAnswers) == 0 { + continue + } + return agent.FinalAnswers[len(agent.FinalAnswers)-1] + } + return "" +} + +func r1ClusterFinalAnswerCitesEvidence(answer string) bool { + lower := strings.ToLower(answer) + if strings.TrimSpace(lower) == "" { + return false + } + for _, needle := range []string{"event", "mnemon", "assignment", "progress", "evidence", "agent"} { + if strings.Contains(lower, needle) { + return true + } + } + return false +} + +func r1ClusterFindingFromAnswer(answer string, counts map[string]int) r1ClusterFindingReport { + kind := "unknown" + lower := strings.ToLower(answer) + switch { + case strings.Contains(lower, "no defect") || strings.Contains(lower, "no concrete defect") || strings.Contains(lower, "no code change"): + kind = "no-defect" + case strings.Contains(lower, "defect") || strings.Contains(lower, "bug") || strings.Contains(lower, "issue"): + kind = "issue" + case strings.Contains(lower, "risk") || strings.Contains(lower, "gap"): + kind = "risk" + } + resolved := strings.Contains(lower, "fixed") || + strings.Contains(lower, "resolved") || + strings.Contains(lower, "no code change") || + strings.Contains(lower, "applied the reviewed minimal fix") || + strings.Contains(lower, "applied the minimal fix") || + strings.Contains(lower, "applied fix") + return r1ClusterFindingReport{ + Kind: kind, + Summary: truncateR1Cluster(strings.TrimSpace(answer), 800), + Evidence: fmt.Sprintf("ledger_counts=%v", counts), + Resolved: resolved, + } +} + +func copyR1ClusterProjectSnapshot(sourceRoot, workspace, runRoot string) error { + sourceRoot, err := filepath.Abs(sourceRoot) + if err != nil { + return err + } + workspace, err = filepath.Abs(workspace) + if err != nil { + return err + } + runRoot, _ = filepath.Abs(runRoot) + return filepath.WalkDir(sourceRoot, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + rel, err := filepath.Rel(sourceRoot, path) + if err != nil { + return err + } + if rel == "." { + return nil + } + if pathWithin(runRoot, path) { + if d.IsDir() { + return filepath.SkipDir + } + return nil + } + parts := splitPath(rel) + if len(parts) == 0 { + return nil + } + if d.IsDir() && r1ClusterSkipSnapshotDir(parts[0], d.Name()) { + return filepath.SkipDir + } + if d.Type()&os.ModeSymlink != 0 { + return nil + } + dst := filepath.Join(workspace, rel) + if d.IsDir() { + return os.MkdirAll(dst, 0o755) + } + if !d.Type().IsRegular() || r1ClusterSkipSnapshotFile(rel, d.Name()) { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + return copyRegularFile(path, dst, info.Mode().Perm()) + }) +} + +func pathWithin(root, path string) bool { + if root == "" { + return false + } + rel, err := filepath.Rel(root, path) + return err == nil && rel != ".." && !strings.HasPrefix(rel, ".."+string(os.PathSeparator)) +} + +func r1ClusterSkipSnapshotDir(first, name string) bool { + switch first { + case ".git", ".testdata", ".mnemon-dev", ".mnemon", ".codex", ".claude", ".openclaw", "node_modules": + return true + } + switch name { + case ".git", ".mnemon", ".codex", ".claude", ".openclaw", "node_modules": + return true + } + return false +} + +func r1ClusterSkipSnapshotFile(rel, name string) bool { + if strings.HasPrefix(rel, ".") { + switch name { + case ".DS_Store": + return true + } + } + switch name { + case "mnemon", "mnemon-harness", "coverage.out": + return true + } + return strings.HasSuffix(name, ".test") || strings.HasSuffix(name, ".tmp") +} + +func applyR1ClusterSeededDefect(agents []r1CodexSyncAgent) (string, error) { + rel := "phase1_seeded_defect_test.go" + body := `package main + +import "testing" + +func TestPhase1SeededRepositoryValidation(t *testing.T) { + t.Fatalf("seeded phase-1 validation defect: replace this failing fixture with a passing assertion") +} +` + for _, agent := range agents { + if err := os.WriteFile(filepath.Join(agent.workspace, rel), []byte(body), 0o644); err != nil { + return rel, fmt.Errorf("%s: write seeded defect: %w", agent.principal, err) + } + } + return rel, nil +} + +func truncateR1Cluster(s string, n int) string { + s = strings.TrimSpace(s) + if len(s) <= n { + return s + } + if n <= 15 { + return s[:n] + } + return s[:n] + "...(truncated)" +} diff --git a/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint_test.go b/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint_test.go new file mode 100644 index 00000000..bca6a449 --- /dev/null +++ b/harness/cmd/mnemon-harness/acceptance_cluster_single_entrypoint_test.go @@ -0,0 +1,145 @@ +package main + +import ( + "strings" + "testing" +) + +func TestChooseR1ClusterEntrypoint(t *testing.T) { + agents := []r1CodexSyncAgent{ + {r1CodexAgent: r1CodexAgent{principal: "codex-01@project"}}, + {r1CodexAgent: r1CodexAgent{principal: "codex-02@project"}}, + {r1CodexAgent: r1CodexAgent{principal: "codex-03@project"}}, + } + idx, err := chooseR1ClusterEntrypoint(agents, "codex-02@project", 1) + if err != nil { + t.Fatalf("explicit entrypoint: %v", err) + } + if idx != 1 { + t.Fatalf("explicit entrypoint index = %d, want 1", idx) + } + first, err := chooseR1ClusterEntrypoint(agents, "", 42) + if err != nil { + t.Fatalf("seeded entrypoint first: %v", err) + } + second, err := chooseR1ClusterEntrypoint(agents, "", 42) + if err != nil { + t.Fatalf("seeded entrypoint second: %v", err) + } + if first != second { + t.Fatalf("seeded entrypoint must be deterministic: %d vs %d", first, second) + } + if _, err := chooseR1ClusterEntrypoint(agents, "missing@project", 1); err == nil { + t.Fatal("missing explicit entrypoint must fail") + } +} + +func TestR1ClusterRunnerContractPrompts(t *testing.T) { + contract := &r1RunnerContractReport{} + recordR1ClusterPrompt(contract, "codex-01@project", "business_task", "do cluster work") + recordR1ClusterPrompt(contract, "codex-02@project", "worker_wake", r1ClusterWorkerWakePrompt) + contract.BusinessTaskPrompts = 1 + contract.WorkerWakePrompts = 1 + if !r1ClusterBusinessBeforeWake(contract) { + t.Fatal("business prompt must be recorded before worker wakes") + } + if !r1ClusterWorkerPromptsGeneric(contract) { + t.Fatal("worker wake prompt must match the generic contract") + } + recordR1ClusterPrompt(contract, "codex-03@project", "worker_wake", "inspect assignment a1") + if r1ClusterWorkerPromptsGeneric(contract) { + t.Fatal("business-shaped worker prompt must violate the generic wake contract") + } +} + +func TestR1ClusterWokeAllNonEntrypoints(t *testing.T) { + agents := []r1CodexAgentReport{ + {Principal: "codex-01@project"}, + {Principal: "codex-02@project"}, + {Principal: "codex-03@project"}, + } + contract := &r1RunnerContractReport{} + recordR1ClusterPrompt(contract, "codex-02@project", "worker_wake", r1ClusterWorkerWakePrompt) + if r1ClusterWokeAllNonEntrypoints(contract, agents, "codex-01@project") { + t.Fatal("partial worker wake coverage must not pass") + } + recordR1ClusterPrompt(contract, "codex-03@project", "worker_wake", r1ClusterWorkerWakePrompt) + if !r1ClusterWokeAllNonEntrypoints(contract, agents, "codex-01@project") { + t.Fatal("all non-entrypoint agents should be covered by generic wakes") + } +} + +func TestR1ClusterActorEventCountsAndProgressReady(t *testing.T) { + obs := acceptanceObserveReport{CrossEvents: []acceptanceCrossEvent{ + {Actor: "codex-01@project", EventSubject: "agent_profile/project@1", Status: "accepted"}, + {Actor: "codex-02@project", EventSubject: "agent_profile/project@2", Status: "accepted"}, + {Actor: "codex-03@project", EventSubject: "agent_profile/project@3", Status: "accepted"}, + {Actor: "codex-01@project", EventSubject: "project_intent/project@1", Status: "accepted"}, + {Actor: "codex-01@project", EventSubject: "teamwork_signal/project@1", Status: "accepted"}, + {Actor: "codex-01@project", EventSubject: "assignment/project@1", Status: "accepted"}, + {Actor: "codex-01@project", EventSubject: "assignment/project@2", Status: "accepted"}, + {Actor: "codex-02@project", EventSubject: "progress_digest/project@1", Status: "accepted"}, + {Actor: "codex-03@project", EventSubject: "progress_digest/project@2", Status: "accepted"}, + }} + counts := r1ClusterActorEventCounts(obs) + if !r1ClusterProgressReady(counts, "codex-01@project") { + t.Fatalf("cluster should be progress-ready: %+v", counts) + } + participants := r1ClusterParticipants(counts, "codex-01@project") + if got := len(participants); got != 3 { + t.Fatalf("participants = %d, want 3: %+v", got, participants) + } + workers := r1ClusterWorkerProgressActors(counts, "codex-01@project") + if len(workers) != 2 || workers[0] != "codex-02@project" || workers[1] != "codex-03@project" { + t.Fatalf("worker progress actors wrong: %+v", workers) + } +} + +func TestR1ClusterFindingNoDefectClassification(t *testing.T) { + finding := r1ClusterFindingFromAnswer("No concrete defect found; no code change is justified.", map[string]int{"progress_digest": 2}) + if finding.Kind != "no-defect" { + t.Fatalf("finding kind = %q, want no-defect", finding.Kind) + } + if !finding.Resolved { + t.Fatal("no-code-change finding should be treated as resolved") + } +} + +func TestR1ClusterFindingAppliedFixResolved(t *testing.T) { + finding := r1ClusterFindingFromAnswer("Found an issue and applied the reviewed minimal fix. Validation passed.", map[string]int{"progress_digest": 4}) + if finding.Kind != "issue" { + t.Fatalf("finding kind = %q, want issue", finding.Kind) + } + if !finding.Resolved { + t.Fatal("applied fix should be treated as resolved") + } +} + +func TestR1ClusterAcceptedEventCount(t *testing.T) { + obs := acceptanceObserveReport{CrossEvents: []acceptanceCrossEvent{ + {Status: "accepted"}, + {Status: "rejected"}, + {Status: "accepted"}, + }} + if got := r1ClusterAcceptedEventCount(obs); got != 2 { + t.Fatalf("accepted event count = %d, want 2", got) + } +} + +func TestR1ClusterAcceptanceEnvPinsGitCeiling(t *testing.T) { + runRoot := t.TempDir() + env := acceptanceEnv("/tmp/mnemon-bin", "/tmp/codex-home", runRoot) + if got := testEnvValue(env, "GIT_CEILING_DIRECTORIES"); got != runRoot { + t.Fatalf("GIT_CEILING_DIRECTORIES = %q, want %q", got, runRoot) + } +} + +func testEnvValue(env []string, key string) string { + prefix := key + "=" + for _, item := range env { + if strings.HasPrefix(item, prefix) { + return strings.TrimPrefix(item, prefix) + } + } + return "" +} diff --git a/harness/internal/app/local_runtime.go b/harness/internal/app/local_runtime.go index 3dfb28b2..3f354864 100644 --- a/harness/internal/app/local_runtime.go +++ b/harness/internal/app/local_runtime.go @@ -44,8 +44,9 @@ func OpenLocalRuntime(storePath string, loaded access.LoadedBindings, loops []st // withSyncImport merges the sync-import half into an assembled runtime policy (v1.1 #2): sync@local // gets one import rule per importable event package + the skipped-kind deny -// rule, kernel authority for the importable kinds, and a subscription covering the binding scope's -// syncable refs (the import rules read the current resource through this view to merge against). +// rule, kernel authority + SchemaGuard headers for the importable kinds, and a subscription covering +// the binding scope's syncable refs (the import rules read the current resource through this view to +// merge against). // Co-existence is by construction: the added rules Handle only the .remote_synced_event.observed / // sync.* observation types AND gate on the sync principal, so host-agent events never match them and // host rules never see the import events — pinned by a test. catalog selects the importable universe @@ -64,6 +65,16 @@ func withSyncImport(rc runtime.RuntimeConfig, bindings []access.ChannelBinding, rc.Authority.Allow = map[contract.ActorID][]contract.ResourceKind{} } rc.Authority.Allow[contract.SyncImportActor] = policy.ImportableKinds(catalog) + if rc.SchemaGuard.Required == nil { + rc.SchemaGuard = state.DefaultSchemaGuard() + } + for _, cap := range catalog { + if cap.Sync.Importable { + if _, known := rc.SchemaGuard.Required[cap.ResourceKind]; !known { + rc.SchemaGuard.Required[cap.ResourceKind] = cap.RequiredHeader + } + } + } // Inject the produce surface: this replica emits synced events for exactly the kinds its catalog // imports (sync-abi-v2 §4). The app fills the kind slice from the event package registry. rc.SyncableKinds = policy.ImportableKinds(catalog) diff --git a/harness/internal/app/sync_worker_test.go b/harness/internal/app/sync_worker_test.go index e8fe550b..8956a35b 100644 --- a/harness/internal/app/sync_worker_test.go +++ b/harness/internal/app/sync_worker_test.go @@ -14,11 +14,18 @@ import ( "github.com/mnemon-dev/mnemon/harness/internal/contract" "github.com/mnemon-dev/mnemon/harness/internal/mnemond/access" + "github.com/mnemon-dev/mnemon/harness/internal/mnemond/policy" "github.com/mnemon-dev/mnemon/harness/internal/mnemond/state" "github.com/mnemon-dev/mnemon/harness/internal/mnemonhub" "github.com/mnemon-dev/mnemon/harness/internal/runtime" ) +const noteImportablePackageSpec = `{"schema_version":1,"name":"note","observed_type":"note.write_candidate.observed", +"proposed_type":"note.write.proposed","resource_kind":"note","items_field":"items", +"fields":[{"name":"text","validators":[{"id":"required","params":{"missing_style":"empty"}},{"id":"safety:unsafe"}]}], +"render":{"content":{"member":"bullet-list","params":{"title":"# Notes","field":"text"}}}, +"sync":{"importable":true,"merge":"item-dedup"}}` + // openServingRuntime boots the PRODUCT serving runtime (OpenLocalRuntime = assembled host policy + // merged sync-import policy) over a standard event host binding — the exact runtime the worker // operates inside `local run`. @@ -108,6 +115,22 @@ func foreignProgressMaterial(decisionID, itemID, summary string) contract.Synced } } +func foreignNoteMaterial(decisionID, itemID, text string) contract.SyncedEventMaterial { + fields := map[string]any{ + "content": "# Notes\n- " + text, + "items": []any{map[string]any{ + "id": itemID, "text": text, + "actor": "codex@other", "ingest_seq": float64(8), + }}, + } + return contract.SyncedEventMaterial{ + OriginReplicaID: "other-replica", LocalDecisionID: decisionID, LocalIngestSeq: 8, + Actor: "codex@other", ResourceRef: contract.ResourceRef{Kind: "note", ID: "project"}, + ResourceVersion: 1, FieldsDigest: workerDigest(fields), Fields: fields, + DecidedAt: "2026-06-12T00:00:00Z", Status: "pending", + } +} + // I13 first leg: with NO remotes.json a worker pass is a strict no-op — zero sync activity, zero // errors, the local store untouched. func TestSyncWorkerIdleWithoutRemoteConfig(t *testing.T) { @@ -260,3 +283,36 @@ func TestServingRuntimeMergesSyncImportWithoutDisturbingHostFlow(t *testing.T) { t.Fatalf("host flow must keep working after an import:\n%s", content) } } + +func TestServingRuntimeImportsExternalKindWithoutLocalLoopEnabled(t *testing.T) { + root := t.TempDir() + writeExternalGoalPackage(t, root, "note", noteImportablePackageSpec) + catalog, err := policy.ResolveRegistry(root, state.DefaultSchemaGuard().Required) + if err != nil { + t.Fatalf("resolve catalog: %v", err) + } + progressRef := contract.ResourceRef{Kind: "progress_digest", ID: "project"} + noteRef := contract.ResourceRef{Kind: "note", ID: "project"} + binding := access.HostAgentBinding("codex@project", "http://127.0.0.1:8787", []contract.ResourceRef{progressRef, noteRef}) + binding.AllowedObservedTypes = []string{"progress_digest.write_candidate.observed"} + rt, err := OpenLocalRuntime(filepath.Join(root, runtime.DefaultStorePath), + access.LoadedBindings{Bindings: []access.ChannelBinding{binding}}, + []string{"progress_digest"}, catalog) + if err != nil { + t.Fatalf("open serving runtime: %v", err) + } + defer rt.Close() + + if err := importPulledEvents(rt, "hub", testSyncedEvents(t, + foreignNoteMaterial("dec-note", "remote-note", "external note import works"), + ), catalog); err != nil { + t.Fatalf("in-process external import: %v", err) + } + _, fields, err := rt.Resource(noteRef) + if err != nil { + t.Fatalf("read note: %v", err) + } + if content, _ := fields["content"].(string); !strings.Contains(content, "external note import works") { + t.Fatalf("external import missing note content:\n%s", content) + } +} diff --git a/internal/setup/claude.go b/internal/setup/claude.go index 40f24393..fa7e1c39 100644 --- a/internal/setup/claude.go +++ b/internal/setup/claude.go @@ -73,7 +73,7 @@ func ClaudeWriteHook(configDir, filename string, content []byte) (string, error) return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/codebuddy.go b/internal/setup/codebuddy.go index b8e0f869..e29f33ab 100644 --- a/internal/setup/codebuddy.go +++ b/internal/setup/codebuddy.go @@ -28,7 +28,7 @@ func CodeBuddyWriteHook(configDir, filename string, content []byte) (string, err return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/cursor.go b/internal/setup/cursor.go index ba8b181f..9355b9a5 100644 --- a/internal/setup/cursor.go +++ b/internal/setup/cursor.go @@ -28,7 +28,7 @@ func CursorWriteHook(configDir, filename string, content []byte) (string, error) return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/files.go b/internal/setup/files.go new file mode 100644 index 00000000..4991a743 --- /dev/null +++ b/internal/setup/files.go @@ -0,0 +1,10 @@ +package setup + +import "os" + +func writeExecutableFile(path string, content []byte) error { + if err := os.WriteFile(path, content, 0o755); err != nil { + return err + } + return os.Chmod(path, 0o755) +} diff --git a/internal/setup/hermes.go b/internal/setup/hermes.go index fbca6f3c..121896e3 100644 --- a/internal/setup/hermes.go +++ b/internal/setup/hermes.go @@ -30,7 +30,7 @@ func HermesWriteHook(configDir, filename string, content []byte) (string, error) return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/kimi.go b/internal/setup/kimi.go index 07d2b549..7b44f252 100644 --- a/internal/setup/kimi.go +++ b/internal/setup/kimi.go @@ -29,7 +29,7 @@ func KimiWriteHook(configDir, filename string, content []byte) (string, error) { return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/qoder.go b/internal/setup/qoder.go index 52d7035d..75164392 100644 --- a/internal/setup/qoder.go +++ b/internal/setup/qoder.go @@ -37,7 +37,7 @@ func QoderWriteHook(configDir, filename string, content []byte) (string, error) return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/trae.go b/internal/setup/trae.go index 5fc74167..95ad535d 100644 --- a/internal/setup/trae.go +++ b/internal/setup/trae.go @@ -28,7 +28,7 @@ func TraeWriteHook(configDir, filename string, content []byte) (string, error) { return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/internal/setup/workbuddy.go b/internal/setup/workbuddy.go index 25f1d36a..088baa7a 100644 --- a/internal/setup/workbuddy.go +++ b/internal/setup/workbuddy.go @@ -28,7 +28,7 @@ func WorkBuddyWriteHook(configDir, filename string, content []byte) (string, err return "", err } hookPath := filepath.Join(hooksDir, filename) - if err := os.WriteFile(hookPath, content, 0755); err != nil { + if err := writeExecutableFile(hookPath, content); err != nil { return "", err } return hookPath, nil diff --git a/scripts/check_bilingual_sync.sh b/scripts/check_bilingual_sync.sh index fd8f4ca7..bdaf4c18 100755 --- a/scripts/check_bilingual_sync.sh +++ b/scripts/check_bilingual_sync.sh @@ -38,12 +38,14 @@ compare_pair() { failed=1 } } -for en in "${EN_DIR}"/*.md; do compare_pair "${en}"; done for zh in "${ZH_DIR}"/*.md; do base="$(basename "${zh}")" - [[ -f "${EN_DIR}/${base}" || "${base}" == "README.md" ]] || { + en="${EN_DIR}/${base}" + [[ -f "${en}" || "${base}" == "README.md" ]] || { echo "missing English mirror: ${EN_DIR}/${base}" >&2 failed=1 + continue } + [[ -f "${en}" ]] && compare_pair "${en}" done exit "${failed}" diff --git a/scripts/check_eval_router_fixture.sh b/scripts/check_eval_router_fixture.sh index df7a6fcb..0452e5aa 100755 --- a/scripts/check_eval_router_fixture.sh +++ b/scripts/check_eval_router_fixture.sh @@ -6,6 +6,11 @@ RUN_ID="df-rgr-0019-router-fixture-$(date -u +%Y%m%dT%H%M%SZ)" PROPOSAL_RUN_ID="$(printf '%s' "${RUN_ID}" | tr '[:upper:]' '[:lower:]')" PROPOSAL_ID="eval-memory-memory-router-failed-finding-${PROPOSAL_RUN_ID}" +if ! go run ./harness/cmd/mnemon-harness eval --help >/dev/null 2>&1; then + echo "eval-router-check: skipped (mnemon-harness eval command unavailable)" + exit 0 +fi + output="$( go run ./harness/cmd/mnemon-harness eval --root "${ROOT}" assert \ --suite router-fixture \