diff --git a/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go b/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go index 87b7e0c1219..a0a93daf470 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go +++ b/cli/azd/extensions/azure.ai.agents/internal/exterrors/codes.go @@ -114,6 +114,11 @@ const ( CodeInvalidFilePath = "invalid_file_path" ) +// Error codes for $ref file-include resolution. +const ( + CodeInvalidFileRef = "invalid_file_ref" +) + // Error codes for toolbox operations. const ( CodeInvalidToolbox = "invalid_toolbox" diff --git a/cli/azd/extensions/azure.ai.agents/internal/project/includes.go b/cli/azd/extensions/azure.ai.agents/internal/project/includes.go new file mode 100644 index 00000000000..6c67ff0c0f1 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/project/includes.go @@ -0,0 +1,285 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package project + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + + "go.yaml.in/yaml/v3" + + "azureaiagent/internal/exterrors" +) + +// refKey is the include directive key. Any object that contains it is replaced by the loaded +// file, with the object's remaining keys overlaid on top (a shallow, top-level merge). +const refKey = "$ref" + +// maxRefDepth bounds nested $ref resolution. Cyclic includes are already rejected by the +// per-chain cycle check; this guards against pathologically deep (but acyclic) chains as a +// defense-in-depth measure. +const maxRefDepth = 32 + +// remoteRefPattern matches a URL scheme prefix (e.g. https://, file://). Remote $ref targets +// are rejected for now; only local YAML/JSON files are supported. +var remoteRefPattern = regexp.MustCompile(`(?i)^[a-z][a-z0-9+.-]*://`) + +// ResolveFileRefs resolves $ref file includes within a parsed Foundry service configuration. +// +// cfg is the Foundry service entry as already-parsed data (the inline azure.yaml keys that +// reach the extension over gRPC, where they arrive as a structpb.Struct decoded to +// map[string]any). projectRoot is the directory that holds azure.yaml; relative $ref targets +// at the top level resolve against it, and rebased project/instructions paths are anchored to +// it. +// +// Any object that contains a "$ref" string is replaced by the referenced YAML or JSON file, +// with the object's remaining keys overlaid on top. The overlay is a shallow, top-level merge: +// sibling scalars, arrays, and objects each replace the loaded value wholesale. Nested $ref +// directives inside a loaded file resolve relative to that file's own directory, and that +// file's relative project, instructions, and $ref paths are rebased so the returned config has +// every path anchored to projectRoot in clean, forward-slash form. Inline path values authored +// directly in azure.yaml are left exactly as written. +// +// $ref values that are URLs are rejected (remote includes are not supported yet); only local +// relative or absolute file paths are accepted. Cyclic and excessively deep include chains +// return an error rather than looping. $ref targets are treated as trusted input, the same +// trust level as azure.yaml itself. +func ResolveFileRefs(cfg map[string]any, projectRoot string) (map[string]any, error) { + if cfg == nil { + return nil, nil + } + + root := filepath.Clean(projectRoot) + resolved, err := resolveValue(cfg, root, root, nil) + if err != nil { + return nil, err + } + + out, ok := resolved.(map[string]any) + if !ok { + // resolveValue always returns a map for a map input; this is unreachable in practice. + return nil, exterrors.Internal(exterrors.CodeInvalidFileRef, "resolved Foundry config is not a mapping") + } + return out, nil +} + +// resolveValue recursively resolves $ref includes in v. baseDir is the directory of the file +// that holds v (so relative $ref and path values resolve correctly); projectRoot is the +// azure.yaml directory used to re-anchor rebased paths; chain holds the absolute paths of the +// $ref files currently being resolved, for cycle detection. +func resolveValue(v any, baseDir, projectRoot string, chain []string) (any, error) { + switch typed := v.(type) { + case map[string]any: + if _, isRef := typed[refKey]; isRef { + return resolveRef(typed, baseDir, projectRoot, chain) + } + + out := make(map[string]any, len(typed)) + for key, child := range typed { + resolvedChild, err := resolveMapEntry(key, child, baseDir, projectRoot, chain) + if err != nil { + return nil, err + } + out[key] = resolvedChild + } + return out, nil + case []any: + out := make([]any, len(typed)) + for i, item := range typed { + resolvedItem, err := resolveValue(item, baseDir, projectRoot, chain) + if err != nil { + return nil, err + } + out[i] = resolvedItem + } + return out, nil + default: + return v, nil + } +} + +// resolveMapEntry resolves a single key/value of a mapping and rebases it when the key is a +// path-bearing field (project, instructions) loaded from a $ref file. Inline values authored +// directly in azure.yaml (baseDir == projectRoot) are left untouched. +func resolveMapEntry(key string, child any, baseDir, projectRoot string, chain []string) (any, error) { + resolvedChild, err := resolveValue(child, baseDir, projectRoot, chain) + if err != nil { + return nil, err + } + + if value, ok := resolvedChild.(string); ok && baseDir != projectRoot && isPathKey(key, value) { + return rebasePath(value, baseDir, projectRoot), nil + } + return resolvedChild, nil +} + +// resolveRef loads the file named by directive[refKey], resolves it relative to its own +// directory, then overlays the directive's sibling keys on top of the loaded object. +func resolveRef(directive map[string]any, baseDir, projectRoot string, chain []string) (any, error) { + ref, ok := directive[refKey].(string) + if !ok { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s value must be a string, got %T", refKey, directive[refKey]), + fmt.Sprintf("Set %s to a relative or absolute path to a YAML or JSON file.", refKey), + ) + } + + target, err := refTargetPath(ref, baseDir) + if err != nil { + return nil, err + } + + if slices.Contains(chain, target) { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("cyclic %s include detected at %q", refKey, target), + "Remove the circular reference between the included files.", + ) + } + if len(chain) >= maxRefDepth { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s include nesting exceeds %d levels at %q", refKey, maxRefDepth, target), + "Reduce the depth of nested $ref includes.", + ) + } + + loaded, err := loadRefFile(target) + if err != nil { + return nil, err + } + + nextChain := append(slices.Clone(chain), target) + resolvedLoaded, err := resolveValue(loaded, filepath.Dir(target), projectRoot, nextChain) + if err != nil { + return nil, err + } + + out, ok := resolvedLoaded.(map[string]any) + if !ok { + // loadRefFile guarantees a mapping, so resolveValue returns a map; unreachable in practice. + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s file %q must contain a YAML or JSON object", refKey, target), + "The referenced file must define an object, not a list or scalar.", + ) + } + + // Overlay sibling keys. They are authored in the file that holds the directive, so they + // resolve and rebase against baseDir, not the loaded file's directory. + for key, child := range directive { + if key == refKey { + continue + } + resolvedChild, err := resolveMapEntry(key, child, baseDir, projectRoot, chain) + if err != nil { + return nil, err + } + out[key] = resolvedChild + } + + return out, nil +} + +// refTargetPath turns a $ref value into an absolute, cleaned local file path. Relative paths +// resolve against baseDir. URLs are rejected. +func refTargetPath(ref, baseDir string) (string, error) { + ref = strings.TrimSpace(ref) + if ref == "" { + return "", exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s value must not be empty", refKey), + "Set $ref to a relative or absolute path to a YAML or JSON file.", + ) + } + if remoteRefPattern.MatchString(ref) { + return "", exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s %q is a URL; remote includes are not supported yet", refKey, ref), + "Use a local file path. Download the file and reference it by a relative or absolute path.", + ) + } + if filepath.IsAbs(ref) { + return filepath.Clean(ref), nil + } + return filepath.Join(baseDir, ref), nil +} + +// loadRefFile reads and parses a referenced YAML or JSON file into a mapping. JSON parses as a +// subset of YAML, so a single decoder handles both. +func loadRefFile(path string) (map[string]any, error) { + // #nosec G304 -- $ref targets are trusted config input, the same trust level as azure.yaml + // itself (design spec ยง2.4 treats includes as trusted input). + data, err := os.ReadFile(path) + if err != nil { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("cannot read %s file %q: %v", refKey, path, err), + "Check that the path is correct and the file exists and is readable.", + ) + } + + var out map[string]any + if err := yaml.Unmarshal(data, &out); err != nil { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s file %q is not a valid YAML or JSON object: %v", refKey, path, err), + "Fix the file so it parses as a YAML or JSON object.", + ) + } + if out == nil { + return nil, exterrors.Validation( + exterrors.CodeInvalidFileRef, + fmt.Sprintf("%s file %q is empty or not a mapping", refKey, path), + "The referenced file must contain a YAML or JSON object.", + ) + } + return out, nil +} + +// isPathKey reports whether a string value for key is a filesystem path that should be rebased. +// project is always a path. instructions is a path only when it looks like one (a single-line +// .md or .txt reference); otherwise it is inline prose and must be left untouched. +func isPathKey(key, value string) bool { + switch key { + case "project": + return value != "" + case "instructions": + return looksLikeInstructionsPath(value) + default: + return false + } +} + +// looksLikeInstructionsPath reports whether an instructions value is a file path rather than +// inline prose: a single line ending in .md or .txt. +func looksLikeInstructionsPath(value string) bool { + if value == "" || strings.ContainsAny(value, "\r\n") { + return false + } + lower := strings.ToLower(strings.TrimSpace(value)) + return strings.HasSuffix(lower, ".md") || strings.HasSuffix(lower, ".txt") +} + +// rebasePath re-anchors a relative path that was authored relative to baseDir so it becomes +// relative to projectRoot, in clean forward-slash form. Empty values, URLs, and absolute paths +// are returned unchanged. +func rebasePath(value, baseDir, projectRoot string) string { + if value == "" || remoteRefPattern.MatchString(value) || filepath.IsAbs(value) { + return value + } + + abs := filepath.Join(baseDir, value) + rel, err := filepath.Rel(projectRoot, abs) + if err != nil { + // Different Windows volumes (or otherwise unrelated paths): fall back to the absolute path. + return filepath.ToSlash(abs) + } + return filepath.ToSlash(rel) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/project/includes_test.go b/cli/azd/extensions/azure.ai.agents/internal/project/includes_test.go new file mode 100644 index 00000000000..271b168c439 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/internal/project/includes_test.go @@ -0,0 +1,388 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package project + +import ( + "os" + "path/filepath" + "testing" + + "github.com/azure/azure-dev/cli/azd/pkg/azdext" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.yaml.in/yaml/v3" + + "azureaiagent/internal/exterrors" +) + +// writeFile writes content to dir/name (creating parent directories) and returns nothing; it +// is used to build $ref fixture trees under a temp project root. +func writeFile(t *testing.T, dir, name, content string) { + t.Helper() + path := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o700)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) +} + +// parseYAML decodes a YAML document into a map, matching how the Foundry config reaches the +// extension (already-parsed data). +func parseYAML(t *testing.T, content string) map[string]any { + t.Helper() + var out map[string]any + require.NoError(t, yaml.Unmarshal([]byte(content), &out)) + return out +} + +// requireFileRefError asserts err is an invalid_file_ref validation error whose message +// contains substr. +func requireFileRefError(t *testing.T, err error, substr string) { + t.Helper() + require.Error(t, err) + var localErr *azdext.LocalError + require.ErrorAs(t, err, &localErr) + assert.Equal(t, exterrors.CodeInvalidFileRef, localErr.Code) + assert.Contains(t, localErr.Message, substr) +} + +func TestResolveFileRefs_NoRefsIsUnchanged(t *testing.T) { + root := t.TempDir() + cfg := parseYAML(t, ` +endpoint: https://my-account.services.ai.azure.com/api/projects/my-project +agents: + - name: a + kind: hosted + project: src/a + - name: b + kind: prompt + instructions: prompts/b.md +`) + want := parseYAML(t, ` +endpoint: https://my-account.services.ai.azure.com/api/projects/my-project +agents: + - name: a + kind: hosted + project: src/a + - name: b + kind: prompt + instructions: prompts/b.md +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + // Inline path values authored directly in azure.yaml are left exactly as written. + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_NilConfig(t *testing.T) { + got, err := ResolveFileRefs(nil, t.TempDir()) + require.NoError(t, err) + assert.Nil(t, got) +} + +func TestResolveFileRefs_RelativeIncludeNoSiblings(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/support-agent.yaml", ` +name: support-agent +kind: hosted +project: ../src/support-agent +docker: + path: Dockerfile +`) + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/support-agent.yaml +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + // project rebased from the agents/ directory to the project root; docker.path is not a + // path-bearing key and is left untouched. + want := parseYAML(t, ` +agents: + - name: support-agent + kind: hosted + project: src/support-agent + docker: + path: Dockerfile +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_RefWithSurroundingWhitespaceResolves(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/a.yaml", ` +name: a +kind: hosted +project: ../src/a +`) + cfg := parseYAML(t, ` +agents: + - $ref: " ./agents/a.yaml " +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + // Surrounding whitespace in the $ref value is trimmed before the path is resolved. + want := parseYAML(t, ` +agents: + - name: a + kind: hosted + project: src/a +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_SiblingOverlayOverrides(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/base.yaml", ` +name: base +kind: hosted +project: ../src/base +protocols: + - protocol: responses + version: "1.0.0" +env: + A: "1" + B: "2" +`) + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/base.yaml + name: override + env: + C: "3" + protocols: + - protocol: a2a + version: "2.0.0" + extra: true +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + // Scalar (name) replaced, map (env) and array (protocols) replaced wholesale (shallow), + // new key (extra) added, and the un-overridden project is still rebased. + want := parseYAML(t, ` +agents: + - name: override + kind: hosted + project: src/base + protocols: + - protocol: a2a + version: "2.0.0" + env: + C: "3" + extra: true +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_NestedIncludeRebasesPerFile(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/a.yaml", ` +name: a +kind: hosted +project: ../src/a +skill: + $ref: ../skills/s.yaml +`) + writeFile(t, root, "skills/s.yaml", ` +name: s +instructions: ../prompts/s.md +`) + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/a.yaml +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + // a.yaml's project rebases from agents/; the nested skill from skills/ rebases its + // instructions path relative to skills/, both anchored back to the project root. + want := parseYAML(t, ` +agents: + - name: a + kind: hosted + project: src/a + skill: + name: s + instructions: prompts/s.md +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_InstructionsInlineVsPath(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "skills/path.yaml", ` +name: path-skill +instructions: ../prompts/code-review.md +`) + writeFile(t, root, "skills/inline.yaml", ` +name: inline-skill +instructions: | + Review the code for style issues. + Keep it terse. +`) + cfg := parseYAML(t, ` +skills: + - $ref: ./skills/path.yaml + - $ref: ./skills/inline.yaml +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + // A single-line .md value is treated as a path and rebased; multi-line inline prose is + // left untouched. + want := parseYAML(t, ` +skills: + - name: path-skill + instructions: prompts/code-review.md + - name: inline-skill + instructions: | + Review the code for style issues. + Keep it terse. +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_AbsolutePathInclude(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/abs.yaml", ` +name: abs +kind: prompt +instructions: hi there +`) + absRef := filepath.ToSlash(filepath.Join(root, "agents", "abs.yaml")) + cfg := map[string]any{ + "agents": []any{ + map[string]any{refKey: absRef}, + }, + } + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + want := parseYAML(t, ` +agents: + - name: abs + kind: prompt + instructions: hi there +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_JSONInclude(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/j.json", `{"name":"j","kind":"prompt","instructions":"hello"}`) + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/j.json +`) + + got, err := ResolveFileRefs(cfg, root) + require.NoError(t, err) + + want := parseYAML(t, ` +agents: + - name: j + kind: prompt + instructions: hello +`) + assert.Equal(t, want, got) +} + +func TestResolveFileRefs_MissingFile(t *testing.T) { + root := t.TempDir() + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/does-not-exist.yaml +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "cannot read") + assert.Contains(t, err.Error(), "does-not-exist.yaml") +} + +func TestResolveFileRefs_MalformedFile(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/bad.yaml", "name: support\n bad: : indentation: [") + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/bad.yaml +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "not a valid YAML or JSON object") +} + +func TestResolveFileRefs_TopLevelSequenceFile(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "agents/list.yaml", "- one\n- two\n") + cfg := parseYAML(t, ` +agents: + - $ref: ./agents/list.yaml +`) + + _, err := ResolveFileRefs(cfg, root) + // A top-level sequence cannot decode into a mapping. + requireFileRefError(t, err, "list.yaml") +} + +func TestResolveFileRefs_CycleDetected(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "a.yaml", ` +name: a +next: + $ref: ./b.yaml +`) + writeFile(t, root, "b.yaml", ` +name: b +next: + $ref: ./a.yaml +`) + cfg := parseYAML(t, ` +agents: + - $ref: ./a.yaml +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "cyclic") +} + +func TestResolveFileRefs_URLRejected(t *testing.T) { + root := t.TempDir() + cfg := parseYAML(t, ` +agents: + - $ref: https://example.com/agents/a.yaml +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "remote includes are not supported") +} + +func TestResolveFileRefs_NonStringRef(t *testing.T) { + root := t.TempDir() + cfg := parseYAML(t, ` +agents: + - $ref: 123 +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "must be a string") +} + +func TestResolveFileRefs_EmptyRef(t *testing.T) { + root := t.TempDir() + cfg := parseYAML(t, ` +agents: + - $ref: "" +`) + + _, err := ResolveFileRefs(cfg, root) + requireFileRefError(t, err, "must not be empty") +}