From d9d49f4dd80cf26aae82b37a94d2020f7a77f90b Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Thu, 16 Apr 2026 23:52:59 +0800
Subject: [PATCH 01/11] chore: trigger release-readiness CI audit


From 34a479a00e32547cb2aa4484864b1eef2c465edc Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 14:18:37 +0800
Subject: [PATCH 02/11] Fix browser session cleanup on launch failure

GitHub Actions surfaced a self-deadlock in BrowserSession startup: StartWithOptions held the session mutex while launch() called Stop() on connect failure, and Stop() tried to lock the same mutex again. Extracting stopLocked() lets the launch failure path clean up without re-locking, and the new regression test locks that cleanup behavior in place.\n\nConstraint: Keep BrowserSession startup/cleanup semantics unchanged outside the failure path\nRejected: Remove the browser Goal Runs E2E from CI | would hide a real release blocker instead of fixing it\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Any future BrowserSession cleanup called from a locked startup path should use stopLocked-style internal helpers rather than Stop() to avoid lock re-entry\nTested: go test -count=1 ./pkg/tools -run 'TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure|TestBrowserSessionStartWithModeAutoFallsBackToLaunch'; go test -count=1 ./pkg/tools; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess|TestGoalRunRecoveryAcrossRealProcessRestart|TestDaemonBackedGoalRunRecoveryAcrossRealProcessRestart'\nNot-tested: Full GitHub Actions rerun not yet observed in this local step
---
 pkg/tools/browser_session.go      | 19 ++++++++++++-------
 pkg/tools/browser_session_test.go | 29 +++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/pkg/tools/browser_session.go b/pkg/tools/browser_session.go
index 424e5f4a..4e48314d 100644
--- a/pkg/tools/browser_session.go
+++ b/pkg/tools/browser_session.go
@@ -217,6 +217,7 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 
 	if err := s.cmd.Start(); err != nil {
 		cancel()
+		s.cancel = nil
 		return fmt.Errorf("failed to start chrome: %w", err)
 	}
 
@@ -224,7 +225,7 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 	time.Sleep(2 * time.Second)
 
 	if err := s.connect(9222, timeout); err != nil {
-		if stopErr := s.Stop(); stopErr != nil {
+		if stopErr := s.stopLocked(); stopErr != nil {
 			s.log.Warn("Failed to stop browser session after launch failure", zap.Error(stopErr))
 		}
 		return err
@@ -264,12 +265,8 @@ func (s *BrowserSession) connectEndpoint(endpoint string, timeout time.Duration)
 	return nil
 }
 
-// Stop stops the browser session.
-func (s *BrowserSession) Stop() error {
-	s.mu.Lock()
-	defer s.mu.Unlock()
-
-	if !s.ready {
+func (s *BrowserSession) stopLocked() error {
+	if s.conn == nil && s.cancel == nil && s.cmd == nil && !s.ready {
 		return nil
 	}
 
@@ -292,6 +289,7 @@ func (s *BrowserSession) Stop() error {
 	s.client = nil
 	s.conn = nil
 	s.cmd = nil
+	s.cancel = nil
 	s.debugURL = ""
 	s.mode = BrowserModeAuto
 	s.endpoint = ""
@@ -299,6 +297,13 @@ func (s *BrowserSession) Stop() error {
 	return nil
 }
 
+// Stop stops the browser session.
+func (s *BrowserSession) Stop() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.stopLocked()
+}
+
 // IsReady returns whether the session is ready.
 func (s *BrowserSession) IsReady() bool {
 	s.mu.RLock()
diff --git a/pkg/tools/browser_session_test.go b/pkg/tools/browser_session_test.go
index f72e1502..8a3de27e 100644
--- a/pkg/tools/browser_session_test.go
+++ b/pkg/tools/browser_session_test.go
@@ -2,6 +2,7 @@ package tools
 
 import (
 	"context"
+	"os/exec"
 	"errors"
 	"testing"
 	"time"
@@ -308,3 +309,31 @@ func TestBrowserSessionGetDevToolsReturnsFactoryForEndpoint(t *testing.T) {
 		t.Fatalf("expected devtools factory to be called once, got %d", called)
 	}
 }
+
+func TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure(t *testing.T) {
+	session := &BrowserSession{timeout: 5 * time.Second, log: newToolsTestLogger(t)}
+	session.connectFn = func(port int, timeout time.Duration) error {
+		return errors.New("not running")
+	}
+	cmd := exec.Command("bash", "-lc", "sleep 30")
+	session.launchFn = func(timeout time.Duration) error {
+		_, cancel := context.WithTimeout(context.Background(), timeout)
+		session.cancel = cancel
+		session.cmd = cmd
+		if err := session.cmd.Start(); err != nil {
+			return err
+		}
+		if err := session.stopLocked(); err != nil {
+			return err
+		}
+		return errors.New("failed to connect after launch")
+	}
+
+	err := session.StartWithMode(2*time.Second, BrowserModeAuto)
+	if err == nil {
+		t.Fatal("expected launch failure")
+	}
+	if session.cmd != nil || session.cancel != nil || session.conn != nil || session.client != nil || session.ready {
+		t.Fatalf("expected failed launch cleanup, got %+v", session.Status())
+	}
+}

From 3ed9516239243c806d3a8b258de56693af5e1989 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 14:23:41 +0800
Subject: [PATCH 03/11] Harden browser session cleanup for CI startup failures

GitHub Actions exposed a browser-session startup deadlock in the Goal Run browser E2E. The fix keeps cleanup on the locked failure path internal via stopLocked(), and also clears stale cancel state when Chrome discovery fails before launch. Two focused regressions now lock both failure modes.\n\nConstraint: Preserve normal browser-session startup behavior while fixing only failure-path cleanup\nRejected: Remove the browser E2E from CI | that would hide a real release blocker instead of resolving it\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Any BrowserSession failure path entered while StartWithOptions holds the mutex must avoid calling lock-taking public cleanup methods\nTested: go test -count=1 ./pkg/tools -run 'TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure|TestBrowserSessionLaunchWithoutChromeCleansUpCancel|TestBrowserSessionStartWithModeAutoFallsBackToLaunch'; go test -count=1 ./pkg/tools; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess|TestGoalRunRecoveryAcrossRealProcessRestart|TestDaemonBackedGoalRunRecoveryAcrossRealProcessRestart'\nNot-tested: Post-fix GitHub Actions run not yet observed at commit time
---
 pkg/tools/browser_session.go      |  2 ++
 pkg/tools/browser_session_test.go | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/pkg/tools/browser_session.go b/pkg/tools/browser_session.go
index 4e48314d..af3e2077 100644
--- a/pkg/tools/browser_session.go
+++ b/pkg/tools/browser_session.go
@@ -192,6 +192,8 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 	// Try to find Chrome executable
 	chromePath := findChrome()
 	if chromePath == "" {
+		cancel()
+		s.cancel = nil
 		return fmt.Errorf("chrome not found in PATH")
 	}
 
diff --git a/pkg/tools/browser_session_test.go b/pkg/tools/browser_session_test.go
index 8a3de27e..4bdf3800 100644
--- a/pkg/tools/browser_session_test.go
+++ b/pkg/tools/browser_session_test.go
@@ -2,6 +2,7 @@ package tools
 
 import (
 	"context"
+	"strings"
 	"os/exec"
 	"errors"
 	"testing"
@@ -337,3 +338,19 @@ func TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure(t *testing.T)
 		t.Fatalf("expected failed launch cleanup, got %+v", session.Status())
 	}
 }
+
+func TestBrowserSessionLaunchWithoutChromeCleansUpCancel(t *testing.T) {
+	session := &BrowserSession{timeout: 5 * time.Second, log: newToolsTestLogger(t)}
+
+	err := session.launch(2 * time.Second)
+	if err == nil {
+		t.Fatal("expected chrome-not-found failure")
+	}
+	if !strings.Contains(err.Error(), "chrome not found in PATH") {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if session.cancel != nil || session.cmd != nil || session.conn != nil || session.client != nil || session.ready {
+		t.Fatalf("expected launch failure without chrome to leave no session state, got ready=%v cmd=%v cancel=%v conn=%v client=%v",
+			session.ready, session.cmd, session.cancel, session.conn, session.client)
+	}
+}

From 35aff795e4ded3297a57a72323a6f635c9436aa1 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 14:28:42 +0800
Subject: [PATCH 04/11] Inject browser discovery for stable startup cleanup
 tests

The CI blocker fix needed one more hardening step: the no-Chrome cleanup regression must not depend on host environment. BrowserSession now accepts an internal findChromeFn hook, and the test injects an empty result so the chrome-not-found cleanup path is validated deterministically.\n\nConstraint: Keep the change internal to BrowserSession and avoid changing public browser tool behavior\nRejected: Rely on CI/local machine Chrome availability to test the no-Chrome path | environment-dependent and flaky\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: When startup paths need environment-specific branches tested, inject the discovery primitive instead of depending on machine state\nTested: go test -count=1 ./pkg/tools -run 'TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure|TestBrowserSessionLaunchWithoutChromeCleansUpCancel|TestBrowserSessionStartWithModeAutoFallsBackToLaunch'; go test -count=1 ./pkg/tools; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess|TestGoalRunRecoveryAcrossRealProcessRestart|TestDaemonBackedGoalRunRecoveryAcrossRealProcessRestart'\nNot-tested: post-fix GitHub Actions rerun not yet observed at commit time
---
 pkg/tools/browser_session.go      | 7 ++++++-
 pkg/tools/browser_session_test.go | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pkg/tools/browser_session.go b/pkg/tools/browser_session.go
index af3e2077..18b6dd77 100644
--- a/pkg/tools/browser_session.go
+++ b/pkg/tools/browser_session.go
@@ -46,6 +46,7 @@ type BrowserSession struct {
 	connectFn         func(port int, timeout time.Duration) error
 	connectEndpointFn func(endpoint string, timeout time.Duration) error
 	launchFn          func(timeout time.Duration) error
+	findChromeFn      func() string
 	devtoolsFactory   func(endpoint string) browserDevTools
 }
 
@@ -95,6 +96,7 @@ func GetBrowserSession(log *logger.Logger) *BrowserSession {
 		browserSession.connectFn = browserSession.connect
 		browserSession.connectEndpointFn = browserSession.connectEndpoint
 		browserSession.launchFn = browserSession.launch
+		browserSession.findChromeFn = findChrome
 		browserSession.devtoolsFactory = func(endpoint string) browserDevTools {
 			return devtool.New(endpoint)
 		}
@@ -136,6 +138,9 @@ func (s *BrowserSession) StartWithOptions(timeout time.Duration, opts BrowserSta
 	if s.launchFn == nil {
 		s.launchFn = s.launch
 	}
+	if s.findChromeFn == nil {
+		s.findChromeFn = findChrome
+	}
 	if s.devtoolsFactory == nil {
 		s.devtoolsFactory = func(endpoint string) browserDevTools {
 			return devtool.New(endpoint)
@@ -190,7 +195,7 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 	s.cancel = cancel
 
 	// Try to find Chrome executable
-	chromePath := findChrome()
+	chromePath := s.findChromeFn()
 	if chromePath == "" {
 		cancel()
 		s.cancel = nil
diff --git a/pkg/tools/browser_session_test.go b/pkg/tools/browser_session_test.go
index 4bdf3800..17fba043 100644
--- a/pkg/tools/browser_session_test.go
+++ b/pkg/tools/browser_session_test.go
@@ -341,6 +341,7 @@ func TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure(t *testing.T)
 
 func TestBrowserSessionLaunchWithoutChromeCleansUpCancel(t *testing.T) {
 	session := &BrowserSession{timeout: 5 * time.Second, log: newToolsTestLogger(t)}
+	session.findChromeFn = func() string { return "" }
 
 	err := session.launch(2 * time.Second)
 	if err == nil {

From 9980aa9a8815b0fa0322b6541f0e807f9e73e7ad Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 14:43:58 +0800
Subject: [PATCH 05/11] Wait for browser debug endpoint before Goal Run E2E

The CI browser regression was still failing because Chrome startup only slept for a fixed two seconds and then attempted a single DevTools attach. On GitHub runners that proved too early, producing transient 'could not resolve IP for 127.0.0.1' failures. BrowserSession now retries the debug attach until the timeout budget expires, and the retry helper is covered by a focused regression.\n\nConstraint: Preserve local fast path while making CI browser startup resilient to slower runner readiness\nRejected: Increase fixed sleep to an arbitrary larger constant | slower and still flaky compared with bounded retry\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Any future browser startup readiness check should prefer bounded retries over fixed sleeps when coordinating with external processes\nTested: go test -count=1 ./pkg/tools -run 'TestBrowserSessionConnectWithRetryEventuallySucceeds|TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure|TestBrowserSessionLaunchWithoutChromeCleansUpCancel|TestBrowserSessionStartWithModeAutoFallsBackToLaunch'; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/tools/browser_session.go      | 27 +++++++++++++++++++++++----
 pkg/tools/browser_session_test.go | 21 +++++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/pkg/tools/browser_session.go b/pkg/tools/browser_session.go
index 18b6dd77..99b5bc6a 100644
--- a/pkg/tools/browser_session.go
+++ b/pkg/tools/browser_session.go
@@ -228,10 +228,7 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 		return fmt.Errorf("failed to start chrome: %w", err)
 	}
 
-	// Wait for Chrome to be ready
-	time.Sleep(2 * time.Second)
-
-	if err := s.connect(9222, timeout); err != nil {
+	if err := s.connectWithRetry(9222, timeout); err != nil {
 		if stopErr := s.stopLocked(); stopErr != nil {
 			s.log.Warn("Failed to stop browser session after launch failure", zap.Error(stopErr))
 		}
@@ -245,6 +242,28 @@ func (s *BrowserSession) launch(timeout time.Duration) error {
 	return nil
 }
 
+func (s *BrowserSession) connectWithRetry(port int, timeout time.Duration) error {
+	if timeout <= 0 {
+		timeout = s.timeout
+	}
+	connectFn := s.connectFn
+	if connectFn == nil {
+		connectFn = s.connect
+	}
+	deadline := time.Now().Add(timeout)
+	var lastErr error
+	for {
+		lastErr = connectFn(port, timeout)
+		if lastErr == nil {
+			return nil
+		}
+		if time.Now().After(deadline) {
+			return lastErr
+		}
+		time.Sleep(250 * time.Millisecond)
+	}
+}
+
 func (s *BrowserSession) connect(port int, timeout time.Duration) error {
 	return s.connectEndpoint(fmt.Sprintf("http://127.0.0.1:%d", port), timeout)
 }
diff --git a/pkg/tools/browser_session_test.go b/pkg/tools/browser_session_test.go
index 17fba043..31dcb1b6 100644
--- a/pkg/tools/browser_session_test.go
+++ b/pkg/tools/browser_session_test.go
@@ -339,6 +339,27 @@ func TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure(t *testing.T)
 	}
 }
 
+func TestBrowserSessionConnectWithRetryEventuallySucceeds(t *testing.T) {
+	session := &BrowserSession{timeout: 2 * time.Second, log: newToolsTestLogger(t)}
+	attempts := 0
+	session.connectFn = func(port int, timeout time.Duration) error {
+		attempts++
+		if attempts < 3 {
+			return errors.New("not ready")
+		}
+		session.ready = true
+		return nil
+	}
+
+	err := session.connectWithRetry(9222, 500*time.Millisecond)
+	if err != nil {
+		t.Fatalf("connectWithRetry returned error: %v", err)
+	}
+	if attempts != 3 {
+		t.Fatalf("expected 3 attempts before success, got %d", attempts)
+	}
+}
+
 func TestBrowserSessionLaunchWithoutChromeCleansUpCancel(t *testing.T) {
 	session := &BrowserSession{timeout: 5 * time.Second, log: newToolsTestLogger(t)}
 	session.findChromeFn = func() string { return "" }

From bc2424e1af33ba4dcfe42dfcaf2d7a323c2f01e9 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 21:03:07 +0800
Subject: [PATCH 06/11] Subscribe before browser navigation waits

The browser Goal Run E2E was still flaky in GitHub Actions because navigate() subscribed to DOMContentLoaded after Page.Navigate, which allowed fast page loads to outrun the listener and hang the wait. Navigation now subscribes before Page.Navigate and uses a bounded wait context when receiving the DOMContentLoaded event.\n\nConstraint: Keep navigation semantics unchanged while eliminating CI timing races\nRejected: Longer fixed sleeps in the E2E flow | slower and less reliable than fixing the event ordering in the tool\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Any future page-load wait in BrowserTool should subscribe before triggering navigation to avoid post-event listeners hanging indefinitely\nTested: go test -count=1 ./pkg/tools -run 'TestBrowserSessionConnectWithRetryEventuallySucceeds|TestBrowserSessionStartWithModeAutoCleansUpAfterLaunchFailure|TestBrowserSessionLaunchWithoutChromeCleansUpCancel|TestBrowserSessionStartWithModeAutoFallsBackToLaunch'; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/tools/browser.go | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/pkg/tools/browser.go b/pkg/tools/browser.go
index 4a092320..6ffc1e46 100644
--- a/pkg/tools/browser.go
+++ b/pkg/tools/browser.go
@@ -400,21 +400,32 @@ func (b *BrowserTool) navigate(ctx context.Context, params map[string]interface{
 		return "", fmt.Errorf("failed to get client: %w", err)
 	}
 
+	// Subscribe before navigation so a fast page load cannot race past the listener.
+	domLoaded, domErr := client.Page.DOMContentEventFired(ctx)
+	if domErr != nil {
+		b.log.Warn("Failed to subscribe to DOMContentLoaded",
+			zap.Error(domErr))
+	}
+
 	nav, err := client.Page.Navigate(ctx, page.NewNavigateArgs(urlStr))
 	if err != nil {
+		if domLoaded != nil {
+			_ = domLoaded.Close()
+		}
 		return "", fmt.Errorf("failed to navigate: %w", err)
 	}
 
 	// Wait for page load
-	domLoaded, err := client.Page.DOMContentEventFired(ctx)
-	if err != nil {
-		b.log.Warn("Failed to wait for DOMContentLoaded",
-			zap.Error(err))
-	} else {
+	if domLoaded != nil {
 		defer func() {
 			_ = domLoaded.Close()
 		}()
-		_, _ = domLoaded.Recv()
+		waitCtx, cancel := context.WithTimeout(ctx, b.timeout)
+		defer cancel()
+		if err := domLoaded.RecvMsg(waitCtx); err != nil {
+			b.log.Warn("Failed to wait for DOMContentLoaded",
+				zap.Error(err))
+		}
 	}
 
 	return fmt.Sprintf("Navigated to: %s\nFrame ID: %s", urlStr, nav.FrameID), nil

From e2ef9198a64618ac52915a0370d9afaecb4565c1 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 22:04:17 +0800
Subject: [PATCH 07/11] Poll readyState after browser navigation

The CI browser regression was still hanging inside BrowserTool.navigate because waiting on DOMContentLoaded via the CDP event stream remained fragile on GitHub runners. The navigation path now uses a bounded readyState poll after Page.Navigate, which is more resilient to event timing races while preserving the existing behavior contract.\n\nConstraint: Keep the browser tool behavior stable for existing callers while removing CI-specific page-load hangs\nRejected: Keep relying on DOMContentLoaded event subscription | still flaky under fast/slow runner timing differences\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: Prefer bounded state polling over event-stream waits for page-load completion when the event ordering is not guaranteed across environments\nTested: go test -count=1 ./pkg/tools; go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/tools/browser.go | 57 ++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/pkg/tools/browser.go b/pkg/tools/browser.go
index 6ffc1e46..a773ea87 100644
--- a/pkg/tools/browser.go
+++ b/pkg/tools/browser.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/mafredri/cdp"
 	"github.com/mafredri/cdp/devtool"
 	"github.com/mafredri/cdp/protocol/dom"
 	"github.com/mafredri/cdp/protocol/emulation"
@@ -400,37 +401,51 @@ func (b *BrowserTool) navigate(ctx context.Context, params map[string]interface{
 		return "", fmt.Errorf("failed to get client: %w", err)
 	}
 
-	// Subscribe before navigation so a fast page load cannot race past the listener.
-	domLoaded, domErr := client.Page.DOMContentEventFired(ctx)
-	if domErr != nil {
-		b.log.Warn("Failed to subscribe to DOMContentLoaded",
-			zap.Error(domErr))
-	}
-
 	nav, err := client.Page.Navigate(ctx, page.NewNavigateArgs(urlStr))
 	if err != nil {
-		if domLoaded != nil {
-			_ = domLoaded.Close()
-		}
 		return "", fmt.Errorf("failed to navigate: %w", err)
 	}
 
-	// Wait for page load
-	if domLoaded != nil {
-		defer func() {
-			_ = domLoaded.Close()
-		}()
-		waitCtx, cancel := context.WithTimeout(ctx, b.timeout)
-		defer cancel()
-		if err := domLoaded.RecvMsg(waitCtx); err != nil {
-			b.log.Warn("Failed to wait for DOMContentLoaded",
-				zap.Error(err))
-		}
+	// Wait for page load using a bounded readyState poll rather than a CDP event
+	// stream. This avoids CI flakes where the event listener can hang indefinitely.
+	if err := waitForPageReadyState(ctx, client, b.timeout); err != nil {
+		b.log.Warn("Failed to wait for readyState after navigation",
+			zap.Error(err))
 	}
 
 	return fmt.Sprintf("Navigated to: %s\nFrame ID: %s", urlStr, nav.FrameID), nil
 }
 
+func waitForPageReadyState(parent context.Context, client *cdp.Client, timeout time.Duration) error {
+	if timeout <= 0 {
+		timeout = 30 * time.Second
+	}
+	waitCtx, cancel := context.WithTimeout(parent, timeout)
+	defer cancel()
+
+	for {
+		evalArgs := runtime.NewEvaluateArgs("document.readyState").SetReturnByValue(true)
+		result, err := client.Runtime.Evaluate(waitCtx, evalArgs)
+		if err == nil && result.ExceptionDetails == nil {
+			var state string
+			if unmarshalErr := json.Unmarshal(result.Result.Value, &state); unmarshalErr == nil {
+				if state == "interactive" || state == "complete" {
+					return nil
+				}
+			}
+		}
+
+		select {
+		case <-waitCtx.Done():
+			if err != nil {
+				return err
+			}
+			return waitCtx.Err()
+		case <-time.After(100 * time.Millisecond):
+		}
+	}
+}
+
 func (b *BrowserTool) startMode(params map[string]interface{}) (BrowserConnectionMode, error) {
 	opts, err := b.startOptions(params)
 	if err != nil {

From f1d1f752778503edee896914fd43d0196671a00a Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 22:12:08 +0800
Subject: [PATCH 08/11] Stabilize Goal Run browser E2E input updates

The browser regression in CI had advanced from startup hangs to stale DOM node lookups when BrowserTool used click+type on the create dialog inputs. The test now uses execute_script to set input and textarea values directly and dispatch input events, which avoids stale-node timing races while still exercising the browser-visible Goal Run flow.\n\nConstraint: Keep the test browser-level without broadening BrowserTool just for one workflow\nRejected: Add more retries around low-level click/type in the test | still vulnerable to stale node handles on dynamic dialog DOM\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: For dynamic form UIs in browser-tool E2E tests, prefer direct value+input-event updates over low-level focus/click typing when DOM replacement is possible\nTested: go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/webui/server_goalruns_test.go | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pkg/webui/server_goalruns_test.go b/pkg/webui/server_goalruns_test.go
index 73e8d2bf..8355876e 100644
--- a/pkg/webui/server_goalruns_test.go
+++ b/pkg/webui/server_goalruns_test.go
@@ -647,17 +647,14 @@ func TestGoalRunBrowserToolManualFlowE2E(t *testing.T) {
 
 	steps := []map[string]interface{}{
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('New goal run'))?.click(); true;`},
-		{"action": "type", "selector": "#goal-run-name", "text": "Browser tool e2e"},
-		{"action": "type", "selector": "#goal-run-goal", "text": "Verify browser-tool Goal Runs flow"},
-		{"action": "type", "selector": "#goal-run-criteria", "text": "confirm manually"},
+		{"action": "execute_script", "script": `(function(){ const set = (selector, value) => { const el = document.querySelector(selector); if (!el) throw new Error('missing '+selector); el.value = value; el.dispatchEvent(new Event('input', { bubbles: true })); }; set('#goal-run-name', 'Browser tool e2e'); set('#goal-run-goal', 'Verify browser-tool Goal Runs flow'); set('#goal-run-criteria', 'confirm manually'); return true; })();`},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Create run'))?.click(); true;`},
 		{"action": "wait", "duration": float64(1200)},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Confirm criteria'))?.click(); true;`},
 		{"action": "wait", "duration": float64(600)},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Start run'))?.click(); true;`},
 		{"action": "wait", "duration": float64(1200)},
-		{"action": "execute_script", "script": `const areas = Array.from(document.querySelectorAll('textarea')); if (areas.length > 0) { areas[areas.length - 1].focus(); } true;`},
-		{"action": "type", "selector": "textarea", "text": "browser tool e2e confirmed"},
+		{"action": "execute_script", "script": `(function(){ const areas = Array.from(document.querySelectorAll('textarea')); if (areas.length === 0) throw new Error('missing textarea'); const el = areas[areas.length - 1]; el.value = 'browser tool e2e confirmed'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Approve criterion'))?.click(); true;`},
 		{"action": "wait", "duration": float64(1200)},
 	}

From 73de8a6abd9e76bfeef42af2b7fe50006f6ba9f8 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 22:24:36 +0800
Subject: [PATCH 09/11] Wait for Goal Run dialog fields before browser E2E
 input

The CI browser regression had narrowed to the create dialog render timing: the one-shot script assumed all three form fields were already present and could throw an uncaught error on slower runners. The test now waits briefly after opening the dialog and sets each field with its own guarded script, which keeps the browser-level flow stable without widening the production surface.\n\nConstraint: Keep the browser regression focused on user-visible flow, not on micro-timing details of the dialog renderer\nRejected: Add larger arbitrary sleeps everywhere in the flow | slower and less disciplined than targeting the dialog render gap directly\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: For browser-tool E2E over dialog-driven UIs, treat dialog field availability as asynchronous and guard each critical selector independently\nTested: go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/webui/server_goalruns_test.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pkg/webui/server_goalruns_test.go b/pkg/webui/server_goalruns_test.go
index 8355876e..d6e8009a 100644
--- a/pkg/webui/server_goalruns_test.go
+++ b/pkg/webui/server_goalruns_test.go
@@ -647,7 +647,10 @@ func TestGoalRunBrowserToolManualFlowE2E(t *testing.T) {
 
 	steps := []map[string]interface{}{
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('New goal run'))?.click(); true;`},
-		{"action": "execute_script", "script": `(function(){ const set = (selector, value) => { const el = document.querySelector(selector); if (!el) throw new Error('missing '+selector); el.value = value; el.dispatchEvent(new Event('input', { bubbles: true })); }; set('#goal-run-name', 'Browser tool e2e'); set('#goal-run-goal', 'Verify browser-tool Goal Runs flow'); set('#goal-run-criteria', 'confirm manually'); return true; })();`},
+		{"action": "wait", "duration": float64(300)},
+		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-name'); if (!el) throw new Error('missing #goal-run-name'); el.value = 'Browser tool e2e'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
+		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-goal'); if (!el) throw new Error('missing #goal-run-goal'); el.value = 'Verify browser-tool Goal Runs flow'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
+		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-criteria'); if (!el) throw new Error('missing #goal-run-criteria'); el.value = 'confirm manually'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Create run'))?.click(); true;`},
 		{"action": "wait", "duration": float64(1200)},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Confirm criteria'))?.click(); true;`},

From 9a213ad6e36bd40b998f45ca4b9f8448fde76568 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 22:43:07 +0800
Subject: [PATCH 10/11] Wait explicitly for Goal Run dialog selectors in
 browser E2E

The latest CI failure showed the create dialog inputs can still lag behind the click that opens the Goal Run form. The browser E2E now waits asynchronously for all required selectors to appear before trying to set values, which keeps the test aligned with the actual UI lifecycle instead of assuming a fixed render delay.\n\nConstraint: Keep the browser regression user-flow oriented while tolerating slower CI dialog rendering\nRejected: Increase the fixed post-click sleep again | less precise and still prone to timing races\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: When browser E2E depends on dynamic dialog fields, wait for the concrete selectors rather than relying on generic delays\nTested: go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/webui/server_goalruns_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/webui/server_goalruns_test.go b/pkg/webui/server_goalruns_test.go
index d6e8009a..82202b21 100644
--- a/pkg/webui/server_goalruns_test.go
+++ b/pkg/webui/server_goalruns_test.go
@@ -647,7 +647,7 @@ func TestGoalRunBrowserToolManualFlowE2E(t *testing.T) {
 
 	steps := []map[string]interface{}{
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('New goal run'))?.click(); true;`},
-		{"action": "wait", "duration": float64(300)},
+		{"action": "execute_script", "script": `(async function waitForDialogFields(deadline){ while (Date.now() < deadline) { if (document.querySelector('#goal-run-name') && document.querySelector('#goal-run-goal') && document.querySelector('#goal-run-criteria')) { return true; } await new Promise(resolve => setTimeout(resolve, 50)); } throw new Error('goal run dialog fields did not render in time'); })(Date.now() + 2000);`},
 		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-name'); if (!el) throw new Error('missing #goal-run-name'); el.value = 'Browser tool e2e'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
 		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-goal'); if (!el) throw new Error('missing #goal-run-goal'); el.value = 'Verify browser-tool Goal Runs flow'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
 		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-criteria'); if (!el) throw new Error('missing #goal-run-criteria'); el.value = 'confirm manually'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},

From b7207dce9867d375c2b3ab7a801b8906c53a97e3 Mon Sep 17 00:00:00 2001
From: czyt <czytcn@gmail.com>
Date: Fri, 17 Apr 2026 22:56:05 +0800
Subject: [PATCH 11/11] Wait for Goal Run dialog before field hydration in
 browser E2E
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CI browser regression showed that even when the create dialog button was clicked, the dialog container and its fields did not become available on the same schedule. The test now waits first for the dialog shell, then waits for each required field during hydration, which matches the UI lifecycle more closely and reduces timing-related false failures.\n\nConstraint: Keep the browser regression aligned with actual UI readiness states instead of assuming synchronous dialog hydration\nRejected: Keep only a single shared field wait with a short deadline | still too sensitive to staggered dialog rendering in CI\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: For browser E2E on modal UIs, separate “dialog is open” readiness from “all fields are hydrated” readiness when CI rendering order matters\nTested: go test -count=1 ./pkg/webui -run 'TestGoalRunBrowserToolManualFlowE2E|TestManualOnlyGoalRunHTTPFlowCompletesOnFreshProcess'\nNot-tested: Latest GitHub Actions rerun not yet observed at commit time
---
 pkg/webui/server_goalruns_test.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pkg/webui/server_goalruns_test.go b/pkg/webui/server_goalruns_test.go
index 82202b21..e73109f2 100644
--- a/pkg/webui/server_goalruns_test.go
+++ b/pkg/webui/server_goalruns_test.go
@@ -647,10 +647,8 @@ func TestGoalRunBrowserToolManualFlowE2E(t *testing.T) {
 
 	steps := []map[string]interface{}{
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('New goal run'))?.click(); true;`},
-		{"action": "execute_script", "script": `(async function waitForDialogFields(deadline){ while (Date.now() < deadline) { if (document.querySelector('#goal-run-name') && document.querySelector('#goal-run-goal') && document.querySelector('#goal-run-criteria')) { return true; } await new Promise(resolve => setTimeout(resolve, 50)); } throw new Error('goal run dialog fields did not render in time'); })(Date.now() + 2000);`},
-		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-name'); if (!el) throw new Error('missing #goal-run-name'); el.value = 'Browser tool e2e'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
-		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-goal'); if (!el) throw new Error('missing #goal-run-goal'); el.value = 'Verify browser-tool Goal Runs flow'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
-		{"action": "execute_script", "script": `(function(){ const el = document.querySelector('#goal-run-criteria'); if (!el) throw new Error('missing #goal-run-criteria'); el.value = 'confirm manually'; el.dispatchEvent(new Event('input', { bubbles: true })); return true; })();`},
+		{"action": "execute_script", "script": `(async function waitForDialog(deadline){ while (Date.now() < deadline) { if (document.querySelector('[role=\"dialog\"]')) { return true; } await new Promise(resolve => setTimeout(resolve, 50)); } throw new Error('goal run dialog did not render in time'); })(Date.now() + 4000);`},
+		{"action": "execute_script", "script": `(async function(){ const waitFor = async (selector) => { const deadline = Date.now() + 4000; while (Date.now() < deadline) { const el = document.querySelector(selector); if (el) return el; await new Promise(resolve => setTimeout(resolve, 50)); } throw new Error('missing ' + selector); }; const set = async (selector, value) => { const el = await waitFor(selector); el.value = value; el.dispatchEvent(new Event('input', { bubbles: true })); }; await set('#goal-run-name', 'Browser tool e2e'); await set('#goal-run-goal', 'Verify browser-tool Goal Runs flow'); await set('#goal-run-criteria', 'confirm manually'); return true; })();`},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Create run'))?.click(); true;`},
 		{"action": "wait", "duration": float64(1200)},
 		{"action": "execute_script", "script": `Array.from(document.querySelectorAll('button')).find(btn => btn.textContent?.includes('Confirm criteria'))?.click(); true;`},