Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ protected void CleanPublishedOutput()

protected string GetDotNetExeForArchitecture()
{
var executableName = DotnetCommandName;
// We expect x64 dotnet.exe to be on the path but we have to go searching for the x86 version.
var executableName = GetHostDotNetExecutable();
// We expect the x64 dotnet host to be resolvable, but we have to go searching for the x86 version.
if (DotNetCommands.IsRunningX86OnX64(DeploymentParameters.RuntimeArchitecture))
{
executableName = DotNetCommands.GetDotNetExecutable(DeploymentParameters.RuntimeArchitecture);
Expand All @@ -116,15 +116,36 @@ protected string GetDotNetExeForArchitecture()
return executableName;
}

// The runtime libraries Helix harness runs tests against the testhost via $RUNTIME_PATH/dotnet by
// absolute path and doesn't add dotnet to PATH (that's only done for workload tests), so the bare
// command name can fail to launch on machines without a global dotnet. Resolve the muxer next to the
// running shared framework instead, falling back to PATH for local runs.
private static string GetHostDotNetExecutable()
{
var runtimeDirectory = Path.GetDirectoryName(typeof(object).Assembly.Location);
if (!string.IsNullOrEmpty(runtimeDirectory))
{
// runtimeDirectory is <testhost>/shared/Microsoft.NETCore.App/<version>; the muxer lives at <testhost>/dotnet.
string dotnetRoot = Path.GetFullPath(Path.Combine(runtimeDirectory, "..", "..", ".."));
string muxer = Path.Combine(dotnetRoot, RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "dotnet.exe" : "dotnet");
if (File.Exists(muxer))
{
return muxer;
}
}

return DotnetCommandName;
}

protected void ShutDownIfAnyHostProcess(Process hostProcess)
{
if (hostProcess != null && !hostProcess.HasExited)
if (hostProcess is not null && IsRunning(hostProcess))
{
Logger.LogInformation("Attempting to cancel process {0}", hostProcess.Id);

// Shutdown the host process.
hostProcess.KillTree();
if (!hostProcess.HasExited)
if (IsRunning(hostProcess))
{
Logger.LogWarning("Unable to terminate the host process with process Id '{processId}", hostProcess.Id);
}
Comment on lines +148 to 151
Expand All @@ -139,6 +160,21 @@ protected void ShutDownIfAnyHostProcess(Process hostProcess)
}
}

// Process.HasExited throws InvalidOperationException ("No process is associated with this object")
// when the process was never started. Treat that as "not running" so disposal after a failed
// deployment doesn't mask the original start failure with a misleading exception.
private static bool IsRunning(Process hostProcess)
{
try
{
return !hostProcess.HasExited;
}
catch (InvalidOperationException)
{
return false;
}
}

protected void AddEnvironmentVariablesToProcess(ProcessStartInfo startInfo, IDictionary<string, string> environmentVariables)
{
var environment = startInfo.Environment;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,44 @@ protected async Task<CancellationToken> StartSelfHostAsync()
AddEnvironmentVariablesToProcess(startInfo, DeploymentParameters.EnvironmentVariables);

var started = new TaskCompletionSource<object>();
var hostExitTokenSource = new CancellationTokenSource();

Comment on lines 124 to +126
await StartHostWithRetryAsync(startInfo, executableName, started, hostExitTokenSource);

if (HostProcess.HasExited)
{
Logger.LogError("Host process {processName} {pid} exited with code {exitCode} or failed to start.", startInfo.FileName, HostProcess.Id, HostProcess.ExitCode);
throw new Exception("Failed to start host");
}

Logger.LogInformation("Started {fileName}. Process Id : {processId}", startInfo.FileName, HostProcess.Id);

HostProcess = new Process() { StartInfo = startInfo };
HostProcess.EnableRaisingEvents = true;
HostProcess.OutputDataReceived += (sender, dataArgs) =>
// Host may not write startup messages, in which case assume it started
if (DeploymentParameters.StatusMessagesEnabled)
{
// The timeout here is large, because we don't know how long the test could need
// We cover a lot of error cases above, but I want to make sure we eventually give up and don't hang the build
// just in case we missed one -anurse
await started.Task.WaitAsync(TimeSpan.FromMinutes(10));
}

return hostExitTokenSource.Token;
}
}

// Launching the host process can fail transiently on constrained CI/Helix machines (for example a

@jkotas jkotas Jun 25, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really the case?

RemoteExecutor is launching a ton of process in CI/Helix machines. It does not have a retry loop like this and we do not see a problems with that.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, I'm trying to find out, the PR was generated by Copilot, please take the comments as WIP.

// failed fork or a momentarily unavailable executable). Retry a few times before giving up so a
// one-off launch failure doesn't fail the test; the final failure is rethrown with its real cause.
private async Task StartHostWithRetryAsync(ProcessStartInfo startInfo, string executableName, TaskCompletionSource<object> started, CancellationTokenSource hostExitTokenSource)
{
const int MaxAttempts = 3;
TimeSpan retryDelay = TimeSpan.FromSeconds(2);

for (int attempt = 1; ; attempt++)
{
var process = new Process() { StartInfo = startInfo };
process.EnableRaisingEvents = true;
process.OutputDataReceived += (sender, dataArgs) =>
{
if (string.Equals(dataArgs.Data, ApplicationStartedMessage))
{
Expand All @@ -134,44 +168,35 @@ protected async Task<CancellationToken> StartSelfHostAsync()

OutputReceived?.Invoke(sender, dataArgs);
};
var hostExitTokenSource = new CancellationTokenSource();
HostProcess.Exited += (sender, e) =>
process.Exited += (sender, e) =>
{
Logger.LogInformation("host process ID {pid} shut down", HostProcess.Id);
Logger.LogInformation("host process ID {pid} shut down", process.Id);

// If TrySetResult was called above, this will just silently fail to set the new state, which is what we want
started.TrySetException(new Exception($"Command exited unexpectedly with exit code: {HostProcess.ExitCode}"));
started.TrySetException(new Exception($"Command exited unexpectedly with exit code: {process.ExitCode}"));

TriggerHostShutdown(hostExitTokenSource);
};

HostProcess = process;

try
{
HostProcess.StartAndCaptureOutAndErrToLogger(executableName, Logger);
}
catch (Exception ex)
{
Logger.LogError("Error occurred while starting the process. Exception: {exception}", ex.ToString());
process.StartAndCaptureOutAndErrToLogger(executableName, Logger);
return;
}

if (HostProcess.HasExited)
catch (Exception ex) when (attempt < MaxAttempts)
{
Logger.LogError("Host process {processName} {pid} exited with code {exitCode} or failed to start.", startInfo.FileName, HostProcess.Id, HostProcess.ExitCode);
throw new Exception("Failed to start host");
Logger.LogWarning("Attempt {attempt} of {maxAttempts} to start the host process failed; retrying in {delaySeconds}s. Exception: {exception}",
attempt, MaxAttempts, retryDelay.TotalSeconds, ex.ToString());
process.Dispose();
await Task.Delay(retryDelay);
}

Logger.LogInformation("Started {fileName}. Process Id : {processId}", startInfo.FileName, HostProcess.Id);

// Host may not write startup messages, in which case assume it started
if (DeploymentParameters.StatusMessagesEnabled)
catch (Exception ex)
{
// The timeout here is large, because we don't know how long the test could need
// We cover a lot of error cases above, but I want to make sure we eventually give up and don't hang the build
// just in case we missed one -anurse
await started.Task.WaitAsync(TimeSpan.FromMinutes(10));
Logger.LogError("Failed to start the host process after {maxAttempts} attempts. Exception: {exception}", MaxAttempts, ex.ToString());
throw;
}

return hostExitTokenSource.Token;
}
}

Expand Down
Loading