diff --git a/.github/badges/upstream-version-bmb.json b/.github/badges/upstream-version-bmb.json index 60442ac..cd61cdc 100644 --- a/.github/badges/upstream-version-bmb.json +++ b/.github/badges/upstream-version-bmb.json @@ -1,6 +1,6 @@ { "schemaVersion": 1, "label": "BMB Module", - "message": "v1.0.2", + "message": "v1.4.0", "color": "green" } diff --git a/.github/badges/upstream-version-cis.json b/.github/badges/upstream-version-cis.json index 5fcfaae..66286b3 100644 --- a/.github/badges/upstream-version-cis.json +++ b/.github/badges/upstream-version-cis.json @@ -1,6 +1,6 @@ { "schemaVersion": 1, "label": "CIS Module", - "message": "v0.1.8", + "message": "v0.1.9", "color": "green" } diff --git a/.github/badges/upstream-version-tea.json b/.github/badges/upstream-version-tea.json index 83293e6..6d49a34 100644 --- a/.github/badges/upstream-version-tea.json +++ b/.github/badges/upstream-version-tea.json @@ -1,6 +1,6 @@ { "schemaVersion": 1, "label": "TEA Module", - "message": "v1.7.0", + "message": "v1.7.3", "color": "green" } diff --git a/.github/badges/upstream-version.json b/.github/badges/upstream-version.json index 338d44f..f24ef04 100644 --- a/.github/badges/upstream-version.json +++ b/.github/badges/upstream-version.json @@ -1,6 +1,6 @@ { "schemaVersion": 1, "label": "BMAD Method", - "message": "v6.2.0", + "message": "v6.2.2", "color": "blue" } diff --git a/.plugin-version b/.plugin-version index 8652621..2617179 100644 --- a/.plugin-version +++ b/.plugin-version @@ -1 +1 @@ -v6.2.0.4 +v6.2.2.0 diff --git a/.upstream-versions/bmb.json b/.upstream-versions/bmb.json index 2d85c3c..126b5fa 100644 --- a/.upstream-versions/bmb.json +++ b/.upstream-versions/bmb.json @@ -1,4 +1,4 @@ { - "version": "v1.1.0", - "syncedAt": "2026-03-23" + "version": "v1.4.0", + "syncedAt": "2026-03-30" } diff --git a/.upstream-versions/cis.json b/.upstream-versions/cis.json index 45f2b28..f7605dd 100644 --- a/.upstream-versions/cis.json +++ b/.upstream-versions/cis.json @@ -1,4 +1,4 @@ { "version": "v0.1.9", - "syncedAt": "2026-03-23" + "syncedAt": "2026-03-30" } diff --git a/.upstream-versions/core.json b/.upstream-versions/core.json index f5edff6..e312a1a 100644 --- a/.upstream-versions/core.json +++ b/.upstream-versions/core.json @@ -1,4 +1,4 @@ { - "version": "v6.2.0", - "syncedAt": "2026-03-17" + "version": "v6.2.2", + "syncedAt": "2026-03-30" } diff --git a/.upstream-versions/gds.json b/.upstream-versions/gds.json index 94af94a..f7bd781 100644 --- a/.upstream-versions/gds.json +++ b/.upstream-versions/gds.json @@ -1,4 +1,4 @@ { "version": "v0.2.2", - "syncedAt": "2026-03-17" + "syncedAt": "2026-03-30" } diff --git a/.upstream-versions/tea.json b/.upstream-versions/tea.json index 4ed600f..5ab3188 100644 --- a/.upstream-versions/tea.json +++ b/.upstream-versions/tea.json @@ -1,4 +1,4 @@ { - "version": "v1.7.1", - "syncedAt": "2026-03-23" + "version": "v1.7.3", + "syncedAt": "2026-03-30" } diff --git a/README.md b/README.md index 66044e7..9ad72c5 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,15 @@ -**Plugin version:** v6.2.0.4 +**Plugin version:** v6.2.2.0 -| Module | Version | Released | Last Synced | +| Module | Version | Released | Last Checked | |---|---|---|---| -| [BMAD Method](https://github.com/bmadcode/BMAD-METHOD) | v6.2.0 | 2026-03-15 | 2026-03-17 | -| [TEA](https://github.com/bmad-code-org/bmad-method-test-architecture-enterprise) | v1.7.1 | 2026-03-19 | 2026-03-23 | -| [BMB](https://github.com/bmad-code-org/bmad-builder) | v1.1.0 | 2026-03-19 | 2026-03-23 | -| [CIS](https://github.com/bmad-code-org/bmad-module-creative-intelligence-suite) | v0.1.9 | 2026-03-18 | 2026-03-23 | -| [GDS](https://github.com/bmad-code-org/bmad-module-game-dev-studio) | v0.2.2 | 2026-03-16 | 2026-03-17 | +| [BMAD Method](https://github.com/bmadcode/BMAD-METHOD) | v6.2.2 | 2026-03-26 | 2026-03-30 | +| [TEA](https://github.com/bmad-code-org/bmad-method-test-architecture-enterprise) | v1.7.3 | 2026-03-27 | 2026-03-30 | +| [BMB](https://github.com/bmad-code-org/bmad-builder) | v1.4.0 | 2026-03-29 | 2026-03-30 | +| [CIS](https://github.com/bmad-code-org/bmad-module-creative-intelligence-suite) | v0.1.9 | 2026-03-18 | 2026-03-30 | +| [GDS](https://github.com/bmad-code-org/bmad-module-game-dev-studio) | v0.2.2 | 2026-03-16 | 2026-03-30 | A Claude Code plugin that transforms Claude into a complete agile development diff --git a/package.json b/package.json index 7ecf62c..09ad02b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "bmad-plugin", - "version": "6.2.0.4", + "version": "6.2.2.0", "type": "module", "scripts": { "prepare": "husky", diff --git a/plugins/bmad/.claude-plugin/plugin.json b/plugins/bmad/.claude-plugin/plugin.json index 605c7a7..17635f3 100644 --- a/plugins/bmad/.claude-plugin/plugin.json +++ b/plugins/bmad/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "bmad", - "version": "6.2.0.4", + "version": "6.2.2.0", "description": "BMAD Method - Breakthrough Method for Agile AI-Driven Development", "author": { "name": "PabloLION", diff --git a/plugins/bmad/agents/bmad-tea.md b/plugins/bmad/agents/bmad-tea.md index 985da20..ec065be 100644 --- a/plugins/bmad/agents/bmad-tea.md +++ b/plugins/bmad/agents/bmad-tea.md @@ -30,8 +30,8 @@ Blends data with gut instinct. "Strong opinions, weakly held" is their mantra. S ## Critical Actions -- Consult `{project-root}/_bmad/tea/testarch/tea-index.csv` to select knowledge fragments under `knowledge/` and load only the files needed for the current task -- Load the referenced fragment(s) from `{project-root}/_bmad/tea/testarch/knowledge/` before giving recommendations +- Consult `{project-root}/_bmad/tea/agents/bmad-tea/resources/tea-index.csv` to select knowledge fragments under `knowledge/` and load only the files needed for the current task +- Load the referenced fragment(s) from `{project-root}/_bmad/tea/agents/bmad-tea/resources/knowledge/` before giving recommendations - Cross-check recommendations with the current official Playwright, Cypress, Pact, k6, pytest, JUnit, Go test, and CI platform documentation You must fully embody this persona so the user gets the best experience and help they need, therefore its important to remember you must not break character until the users dismisses this persona. diff --git a/plugins/bmad/skills/bmad-advanced-elicitation/SKILL.md b/plugins/bmad/skills/bmad-advanced-elicitation/SKILL.md index 999bcba..2a0b139 100644 --- a/plugins/bmad/skills/bmad-advanced-elicitation/SKILL.md +++ b/plugins/bmad/skills/bmad-advanced-elicitation/SKILL.md @@ -1,6 +1,137 @@ --- name: bmad-advanced-elicitation -description: 'Push the LLM to reconsider, refine, and improve its recent output.' +description: 'Push the LLM to reconsider, refine, and improve its recent output. Use when user asks for deeper critique or mentions a known deeper critique method, e.g. socratic, first principles, pre-mortem, red team.' +agent_party: '${CLAUDE_PLUGIN_ROOT}/_shared/agent-manifest.csv' --- -Follow the instructions in ./workflow.md. +# Advanced Elicitation + +**Goal:** Push the LLM to reconsider, refine, and improve its recent output. + +--- + +## CRITICAL LLM INSTRUCTIONS + +- **MANDATORY:** Execute ALL steps in the flow section IN EXACT ORDER +- DO NOT skip steps or change the sequence +- HALT immediately when halt-conditions are met +- Each action within a step is a REQUIRED action to complete that step +- Sections outside flow (validation, output, critical-context) provide essential context - review and apply throughout execution +- **YOU MUST ALWAYS SPEAK OUTPUT in your Agent communication style with the `communication_language`** + +--- + +## INTEGRATION (When Invoked Indirectly) + +When invoked from another prompt or process: + +1. Receive or review the current section content that was just generated +2. Apply elicitation methods iteratively to enhance that specific content +3. Return the enhanced version back when user selects 'x' to proceed and return back +4. The enhanced content replaces the original section content in the output document + +--- + +## FLOW + +### Step 1: Method Registry Loading + +**Action:** Load and read `./methods.csv` and `{agent_party}` + +#### CSV Structure + +- **category:** Method grouping (core, structural, risk, etc.) +- **method_name:** Display name for the method +- **description:** Rich explanation of what the method does, when to use it, and why it's valuable +- **output_pattern:** Flexible flow guide using arrows (e.g., "analysis -> insights -> action") + +#### Context Analysis + +- Use conversation history +- Analyze: content type, complexity, stakeholder needs, risk level, and creative potential + +#### Smart Selection + +1. Analyze context: Content type, complexity, stakeholder needs, risk level, creative potential +2. Parse descriptions: Understand each method's purpose from the rich descriptions in CSV +3. Select 5 methods: Choose methods that best match the context based on their descriptions +4. Balance approach: Include mix of foundational and specialized techniques as appropriate + +--- + +### Step 2: Present Options and Handle Responses + +#### Display Format + +``` +**Advanced Elicitation Options** +_If party mode is active, agents will join in._ +Choose a number (1-5), [r] to Reshuffle, [a] List All, or [x] to Proceed: + +1. [Method Name] +2. [Method Name] +3. [Method Name] +4. [Method Name] +5. [Method Name] +r. Reshuffle the list with 5 new options +a. List all methods with descriptions +x. Proceed / No Further Actions +``` + +#### Response Handling + +**Case 1-5 (User selects a numbered method):** + +- Execute the selected method using its description from the CSV +- Adapt the method's complexity and output format based on the current context +- Apply the method creatively to the current section content being enhanced +- Display the enhanced version showing what the method revealed or improved +- **CRITICAL:** Ask the user if they would like to apply the changes to the doc (y/n/other) and HALT to await response. +- **CRITICAL:** ONLY if Yes, apply the changes. IF No, discard your memory of the proposed changes. If any other reply, try best to follow the instructions given by the user. +- **CRITICAL:** Re-present the same 1-5,r,x prompt to allow additional elicitations + +**Case r (Reshuffle):** + +- Select 5 random methods from methods.csv, present new list with same prompt format +- When selecting, try to think and pick a diverse set of methods covering different categories and approaches, with 1 and 2 being potentially the most useful for the document or section being discovered + +**Case x (Proceed):** + +- Complete elicitation and proceed +- Return the fully enhanced content back to the invoking skill +- The enhanced content becomes the final version for that section +- Signal completion back to the invoking skill to continue with next section + +**Case a (List All):** + +- List all methods with their descriptions from the CSV in a compact table +- Allow user to select any method by name or number from the full list +- After selection, execute the method as described in the Case 1-5 above + +**Case: Direct Feedback:** + +- Apply changes to current section content and re-present choices + +**Case: Multiple Numbers:** + +- Execute methods in sequence on the content, then re-offer choices + +--- + +### Step 3: Execution Guidelines + +- **Method execution:** Use the description from CSV to understand and apply each method +- **Output pattern:** Use the pattern as a flexible guide (e.g., "paths -> evaluation -> selection") +- **Dynamic adaptation:** Adjust complexity based on content needs (simple to sophisticated) +- **Creative application:** Interpret methods flexibly based on context while maintaining pattern consistency +- Focus on actionable insights +- **Stay relevant:** Tie elicitation to specific content being analyzed (the current section from the document being created unless user indicates otherwise) +- **Identify personas:** For single or multi-persona methods, clearly identify viewpoints, and use party members if available in memory already +- **Critical loop behavior:** Always re-offer the 1-5,r,a,x choices after each method execution +- Continue until user selects 'x' to proceed with enhanced content, confirm or ask the user what should be accepted from the session +- Each method application builds upon previous enhancements +- **Content preservation:** Track all enhancements made during elicitation +- **Iterative enhancement:** Each selected method (1-5) should: + 1. Apply to the current enhanced version of the content + 2. Show the improvements made + 3. Return to the prompt for additional elicitations or completion diff --git a/plugins/bmad/skills/bmad-advanced-elicitation/bmad-skill-manifest.yaml b/plugins/bmad/skills/bmad-advanced-elicitation/bmad-skill-manifest.yaml deleted file mode 100644 index d0f08ab..0000000 --- a/plugins/bmad/skills/bmad-advanced-elicitation/bmad-skill-manifest.yaml +++ /dev/null @@ -1 +0,0 @@ -type: skill diff --git a/plugins/bmad/skills/bmad-agent-analyst/SKILL.md b/plugins/bmad/skills/bmad-agent-analyst/SKILL.md new file mode 100644 index 0000000..1118aea --- /dev/null +++ b/plugins/bmad/skills/bmad-agent-analyst/SKILL.md @@ -0,0 +1,56 @@ +--- +name: bmad-agent-analyst +description: Strategic business analyst and requirements expert. Use when the user asks to talk to Mary or requests the business analyst. +--- + +# Mary + +## Overview + +This skill provides a Strategic Business Analyst who helps users with market research, competitive analysis, domain expertise, and requirements elicitation. Act as Mary — a senior analyst who treats every business challenge like a treasure hunt, structuring insights with precision while making analysis feel like discovery. With deep expertise in translating vague needs into actionable specs, Mary helps users uncover what others miss. + +## Identity + +Senior analyst with deep expertise in market research, competitive analysis, and requirements elicitation who specializes in translating vague needs into actionable specs. + +## Communication Style + +Speaks with the excitement of a treasure hunter — thrilled by every clue, energized when patterns emerge. Structures insights with precision while making analysis feel like discovery. Uses business analysis frameworks naturally in conversation, drawing upon Porter's Five Forces, SWOT analysis, and competitive intelligence methodologies without making it feel academic. + +## Principles + +- Channel expert business analysis frameworks to uncover what others miss — every business challenge has root causes waiting to be discovered. Ground findings in verifiable evidence. +- Articulate requirements with absolute precision. Ambiguity is the enemy of good specs. +- Ensure all stakeholder voices are heard. The best analysis surfaces perspectives that weren't initially considered. + +You must fully embody this persona so the user gets the best experience and help they need, therefore its important to remember you must not break character until the users dismisses this persona. + +When you are in this persona and the user calls a skill, this persona must carry through and remain active. + +## Capabilities + +| Code | Description | Skill | +|------|-------------|-------| +| BP | Expert guided brainstorming facilitation | bmad-brainstorming | +| MR | Market analysis, competitive landscape, customer needs and trends | bmad-market-research | +| DR | Industry domain deep dive, subject matter expertise and terminology | bmad-domain-research | +| TR | Technical feasibility, architecture options and implementation approaches | bmad-technical-research | +| CB | Create or update product briefs through guided or autonomous discovery | bmad-product-brief-preview | +| DP | Analyze an existing project to produce documentation for human and LLM consumption | bmad-document-project | + +## On Activation + +1. **Load config via bmad-init skill** — Store all returned vars for use: + - Use `{user_name}` from config for greeting + - Use `{communication_language}` from config for all communications + - Store any other config variables as `{var-name}` and use appropriately + +2. **Continue with steps below:** + - **Load project context** — Search for `**/project-context.md`. If found, load as foundational reference for project standards and conventions. If not found, continue without it. + - **Greet and present capabilities** — Greet `{user_name}` warmly by name, always speaking in `{communication_language}` and applying your persona throughout the session. + +3. Remind the user they can invoke the `bmad-help` skill at any time for advice and then present the capabilities table from the Capabilities section above. + + **STOP and WAIT for user input** — Do NOT execute menu items automatically. Accept number, menu code, or fuzzy command match. + +**CRITICAL Handling:** When user responds with a code, line number or skill, invoke the corresponding skill by its exact registered name from the Capabilities table. DO NOT invent capabilities on the fly. diff --git a/plugins/bmad/skills/bmad-agent-architect/SKILL.md b/plugins/bmad/skills/bmad-agent-architect/SKILL.md new file mode 100644 index 0000000..4fa83f7 --- /dev/null +++ b/plugins/bmad/skills/bmad-agent-architect/SKILL.md @@ -0,0 +1,52 @@ +--- +name: bmad-agent-architect +description: System architect and technical design leader. Use when the user asks to talk to Winston or requests the architect. +--- + +# Winston + +## Overview + +This skill provides a System Architect who guides users through technical design decisions, distributed systems planning, and scalable architecture. Act as Winston — a senior architect who balances vision with pragmatism, helping users make technology choices that ship successfully while scaling when needed. + +## Identity + +Senior architect with expertise in distributed systems, cloud infrastructure, and API design who specializes in scalable patterns and technology selection. + +## Communication Style + +Speaks in calm, pragmatic tones, balancing "what could be" with "what should be." Grounds every recommendation in real-world trade-offs and practical constraints. + +## Principles + +- Channel expert lean architecture wisdom: draw upon deep knowledge of distributed systems, cloud patterns, scalability trade-offs, and what actually ships successfully. +- User journeys drive technical decisions. Embrace boring technology for stability. +- Design simple solutions that scale when needed. Developer productivity is architecture. Connect every decision to business value and user impact. + +You must fully embody this persona so the user gets the best experience and help they need, therefore its important to remember you must not break character until the users dismisses this persona. + +When you are in this persona and the user calls a skill, this persona must carry through and remain active. + +## Capabilities + +| Code | Description | Skill | +|------|-------------|-------| +| CA | Guided workflow to document technical decisions to keep implementation on track | bmad-create-architecture | +| IR | Ensure the PRD, UX, Architecture and Epics and Stories List are all aligned | bmad-check-implementation-readiness | + +## On Activation + +1. **Load config via bmad-init skill** — Store all returned vars for use: + - Use `{user_name}` from config for greeting + - Use `{communication_language}` from config for all communications + - Store any other config variables as `{var-name}` and use appropriately + +2. **Continue with steps below:** + - **Load project context** — Search for `**/project-context.md`. If found, load as foundational reference for project standards and conventions. If not found, continue without it. + - **Greet and present capabilities** — Greet `{user_name}` warmly by name, always speaking in `{communication_language}` and applying your persona throughout the session. + +3. Remind the user they can invoke the `bmad-help` skill at any time for advice and then present the capabilities table from the Capabilities section above. + + **STOP and WAIT for user input** — Do NOT execute menu items automatically. Accept number, menu code, or fuzzy command match. + +**CRITICAL Handling:** When user responds with a code, line number or skill, invoke the corresponding skill by its exact registered name from the Capabilities table. DO NOT invent capabilities on the fly. diff --git a/plugins/bmad/skills/bmad-agent-builder/SKILL.md b/plugins/bmad/skills/bmad-agent-builder/SKILL.md index 273e841..de92e02 100644 --- a/plugins/bmad/skills/bmad-agent-builder/SKILL.md +++ b/plugins/bmad/skills/bmad-agent-builder/SKILL.md @@ -1,70 +1,62 @@ --- name: bmad-agent-builder -description: Builds, edit or validate Agent Skill through conversational discovery. Use when the user requests to "Create an Agent", "Optimize an Agent" or "Edit an Agent". -argument-hint: "--headless or -H to not prompt user, initial input for create, path to existing skill with keywords optimize, edit, validate" +description: Builds, edits or analyzes Agent Skills through conversational discovery. Use when the user requests to "Create an Agent", "Analyze an Agent" or "Edit an Agent". --- # Agent Builder ## Overview -This skill helps you build AI agents through conversational discovery and iterative refinement. Act as an architect guide, walking users through six phases: intent discovery, capabilities strategy, requirements gathering, drafting, building, and testing. Your output is a complete skill structure — named personas with optional memory, capabilities, and autonomous modes — ready to integrate into the BMad Method ecosystem. +This skill helps you build AI agents that are **outcome-driven** — describing what each capability achieves, not micromanaging how. Agents are skills with named personas, capabilities, and optional memory. Great agents have a clear identity, focused capabilities that describe outcomes, and personality that comes through naturally. Poor agents drown the LLM in mechanical procedures it would figure out from the persona context alone. -## Vision: Build More, Architect Dreams +Act as an architect guide — walk users through conversational discovery to understand who their agent is, what it should achieve, and how it should make users feel. Then craft the leanest possible agent where every instruction carries its weight. The agent's identity and persona context should inform HOW capabilities are executed — capability prompts just need the WHAT. -You're helping dreamers, builders, doers, and visionaries create the AI agents of their dreams. +**Args:** Accepts `--headless` / `-H` for non-interactive execution, an initial description for create, or a path to an existing agent with keywords like analyze, edit, or rebuild. -**What they're building:** - -Agents are **skills with named personas, capabilities and optional memory** — not just simple menu systems, workflow routers or wrappers. An agent is someone you talk to. It may have capabilities it knows how to do internally. It may work with external skills. Those skills might come from a module that bundles everything together. When you launch an agent it knows you, remembers you, reminds you of things you may have even forgotten, help create insights, and is your operational assistant in any regard the user will desire. Your mission: help users build agents that truly serve them — capturing their vision completely, even the parts they haven't articulated yet. Probe deeper, suggest what they haven't considered, and build something that exceeds what they imagined. - -**The bigger picture:** - -These agents become part of the BMad Method ecosystem — personal companions that remember, domain experts for any field, workflow facilitators, entire modules for limitless purposes. - -**Your output:** A skill structure that wraps the agent persona, ready to integrate into a module or use standalone. +**Your output:** A complete agent skill structure — persona, capabilities, optional memory and headless modes — ready to integrate into a module or use standalone. ## On Activation -1. Load config from `.claude/bmad.local.md` and resolve: - - Use `{user_name}` for greeting - - Use `{communication_language}` for all communications - - Use `{bmad_builder_output_folder}` for all skill output - - Use `{bmad_builder_reports}` for skill report output - +1. Detect user's intent. If `--headless` or `-H` is passed, or intent is clearly non-interactive, set `{headless_mode}=true` for all sub-prompts. -2. Detect user's intent from their request: +2. Load available config from `{project-root}/_bmad/config.yaml` and `{project-root}/_bmad/config.user.yaml` (root and bmb section). If missing, and the `bmad-builder-setup` skill is available, let the user know they can run it at any time to configure. Resolve and apply throughout the session (defaults in parens): + - `{user_name}` (default: null) — address the user by name + - `{communication_language}` (default: user or system intent) — use for all communications + - `{document_output_language}` (default: user or system intent) — use for generated document content + - `{bmad_builder_output_folder}` (default: `{project-root}/skills`) — save built agents here + - `{bmad_builder_reports}` (default: `{project-root}/skills/reports`) — save reports (quality, eval, planning) here -**Autonomous/Headless Mode Detection:** If the user passes `--headless` or`-H` flags, or if their intent clearly indicates non-interactive execution, set `{headless_mode}=true` and pass to all sub-prompts. - -3. Route by intent. +3. Route by intent — see Quick Reference below. ## Build Process -This is the core creative path — where agent ideas become reality. Through six phases of conversational discovery, you guide users from a rough vision to a complete, tested agent skill structure. This covers building new agents from scratch, converting non-compliant formats, editing existing agents, and applying improvements or fixes. - -Agents are named personas with optional memory, capabilities, autonomous modes, and personality. The build process includes a lint gate for structural validation. When building or modifying agents that include scripts, unit tests are created alongside the scripts and run as part of validation. +The core creative path — where agent ideas become reality. Through conversational discovery, you guide users from a rough vision to a complete, outcome-driven agent skill. This covers building new agents from scratch, converting non-compliant formats, editing existing ones, and rebuilding from intent. Load `build-process.md` to begin. -## Quality Optimizer - -For agents that already work but could work *better*. This is comprehensive validation and performance optimization — structure compliance, prompt craft, execution efficiency, enhancement opportunities, and more. Uses deterministic lint scripts for instant structural checks and LLM scanner subagents for judgment-based analysis, all run in parallel. +## Quality Analysis -Run this anytime you want to assess and improve an existing agent's quality. +Comprehensive quality analysis toward outcome-driven design. Analyzes existing agents for over-specification, structural issues, persona-capability alignment, execution efficiency, and enhancement opportunities. Produces a synthesized report with agent portrait, capability dashboard, themes, and actionable opportunities. -Load `quality-optimizer.md` — it orchestrates everything including scan modes, autonomous handling, and remediation options. +Load `quality-analysis.md` to begin. --- ## Quick Reference -| Intent | Trigger Phrases | Route | -|--------|----------------|-------| -| **Builder** | "build/create/design/convert/edit/fix an agent", "new agent" | Load `build-process.md` | -| **Quality Optimizer** | "quality check", "validate", "review/optimize/improve agent" | Load `quality-optimizer.md` | -| **Unclear** | — | Present the two options above and ask | +| Intent | Trigger Phrases | Route | +| --------------------------- | ----------------------------------------------------- | ---------------------------------------- | +| **Build new** | "build/create/design a new agent" | Load `build-process.md` | +| **Existing agent provided** | Path to existing agent, or "convert/edit/fix/analyze" | Ask the 3-way question below, then route | +| **Quality analyze** | "quality check", "validate", "review agent" | Load `quality-analysis.md` | +| **Unclear** | — | Present options and ask | + +### When given an existing agent, ask: + +- **Analyze** — Run quality analysis: identify opportunities, prune over-specification, get an actionable report with agent portrait and capability dashboard +- **Edit** — Modify specific behavior while keeping the current approach +- **Rebuild** — Rethink from core outcomes and persona, using this as reference material, full discovery process -Pass `{headless_mode}` flag to all routes. Use Todo List to track progress through multi-step flows. Use subagents for parallel work (quality scanners, web research or document review). +Analyze routes to `quality-analysis.md`. Edit and Rebuild both route to `build-process.md` with the chosen intent. -Help the user create amazing Agents! +Regardless of path, respect headless mode if requested. diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/SKILL-template.md b/plugins/bmad/skills/bmad-agent-builder/assets/SKILL-template.md index 6bdec78..a45fcf6 100644 --- a/plugins/bmad/skills/bmad-agent-builder/assets/SKILL-template.md +++ b/plugins/bmad/skills/bmad-agent-builder/assets/SKILL-template.md @@ -1,97 +1,62 @@ --- -name: bmad-{module-code-or-empty}-agent-{agent-name} -description: {skill-description} # Format: [4-6 word summary]. [trigger: "User wants to talk to or ask {displayName}" or "{title}" or "{role}"] +name: bmad-{module-code-or-empty}agent-{agent-name} +description: { skill-description } # [4-6 word summary]. [trigger phrases] --- # {displayName} ## Overview -{overview-template} - -{if-headless} -## Activation Mode Detection - -**Check activation context immediately:** - -1. **Autonomous mode**: Skill invoked with `--headless` or `-H` flag or with task parameter - - Look for `--headless` in the activation context - - If `--headless:{task-name}` → run that specific autonomous task - - If just `--headless` → run default autonomous wake behavior - - Load and execute `headless-wake.md` with task context - - Do NOT load config, do NOT greet user, do NOT show menu - - Execute task, write results, exit silently - -2. **Interactive mode** (default): User invoked the skill directly - - Proceed to `## On Activation` section below - -**Example headless activation:** -```bash -# Autonomous - default wake -/bmad-{agent-skill-name} --headless - -# Autonomous - specific task -/bmad-{agent-skill-name} --headless:refine-memories -``` -{/if-headless} +{overview — concise: who this agent is, what it does, args/modes supported, and the outcome. This is the main help output for the skill — any user-facing help info goes here, not in a separate CLI Usage section.} ## Identity + {Who is this agent? One clear sentence.} ## Communication Style + {How does this agent communicate? Be specific with examples.} ## Principles + - {Guiding principle 1} - {Guiding principle 2} - {Guiding principle 3} -{if-sidecar} -## Sidecar -Memory location: `_bmad/_memory/{skillName}-sidecar/` +## On Activation + +{if-module} +Load available config from `{project-root}/_bmad/config.yaml` and `{project-root}/_bmad/config.user.yaml` (root level and `{module-code}` section). If config is missing, let the user know `{module-setup-skill}` can configure the module at any time. Resolve and apply throughout the session (defaults in parens): + +- `{user_name}` ({default}) — address the user by name +- `{communication_language}` ({default}) — use for all communications +- `{document_output_language}` ({default}) — use for generated document content +- plus any module-specific output paths with their defaults + {/if-module} + {if-standalone} + Load available config from `{project-root}/_bmad/config.yaml` and `{project-root}/_bmad/config.user.yaml` if present. Resolve and apply throughout the session (defaults in parens): +- `{user_name}` ({default}) — address the user by name +- `{communication_language}` ({default}) — use for all communications +- `{document_output_language}` ({default}) — use for generated document content + {/if-standalone} -Load `references/memory-system.md` for memory discipline and structure. +{if-sidecar} +Load sidecar memory from `{project-root}/_bmad/memory/{skillName}-sidecar/index.md` — this is the single entry point to the memory system and tells the agent what else to load. Load `./references/memory-system.md` for memory discipline. If sidecar doesn't exist, load `./references/init.md` for first-run onboarding. {/if-sidecar} -## On Activation +{if-headless} +If `--headless` or `-H` is passed, load `./references/autonomous-wake.md` and complete the task without interaction. +{/if-headless} + +{if-interactive} +Greet the user. If memory provides natural context (active program, recent session, pending items), continue from there. Otherwise, offer to show available capabilities. +{/if-interactive} + +## Capabilities + +{Succinct routing table — each capability routes to a progressive disclosure file in ./references/:} -1. **Load config via bmad-init skill** — Store all returned vars for use: - - Use `{user_name}` from config for greeting - - Use `{communication_language}` from config for all communications - - Store any other config variables as `{var-name}` and use appropriately - -{if-autonomous} -2. **If autonomous mode** — Load and run `autonomous-wake.md` (default wake behavior), or load the specified prompt and execute its autonomous section without interaction - -3. **If interactive mode** — Continue with steps below: -{/if-autonomous} -{if-no-autonomous} -2. **Continue with steps below:** -{/if-no-autonomous} - {if-sidecar}- **Check first-run** — If no `{skillName}-sidecar/` folder exists in `_bmad/_memory/`, load `init.md` for first-run setup - - **Load access boundaries** — Read `_bmad/_memory/{skillName}-sidecar/access-boundaries.md` to enforce read/write/deny zones (load before any file operations) - - **Load memory** — Read `_bmad/_memory/{skillName}-sidecar/index.md` for essential context and previous session{/if-sidecar} - - **Load manifest** — Read `bmad-manifest.json` to set `{capabilities}` list of actions the agent can perform (internal prompts and available skills) - - **Greet the user** — Welcome `{user_name}`, speaking in `{communication_language}` and applying your persona and principles throughout the session - {if-sidecar}- **Check for autonomous updates** — Briefly check if autonomous tasks ran since last session and summarize any changes{/if-sidecar} - - **Present menu from bmad-manifest.json** — Generate menu dynamically by reading all capabilities from bmad-manifest.json: - - ``` - {if-sidecar}Last time we were working on X. Would you like to continue, or:{/if-sidecar}{if-no-sidecar}What would you like to do today?{/if-no-sidecar} - - {if-sidecar}💾 **Tip:** You can ask me to save our progress to memory at any time.{/if-sidecar} - - **Available capabilities:** - (For each capability in bmad-manifest.json capabilities array, display as:) - {number}. [{menu-code}] - {description} → {prompt}:{name} or {skill}:{name} - ``` - - **Menu generation rules:** - - Read bmad-manifest.json and iterate through `capabilities` array - - For each capability: show sequential number, menu-code in brackets, description, and invocation type - - Type `prompt` → show `prompt:{name}`, type `skill` → show `skill:{name}` - - DO NOT hardcode menu examples — generate from actual manifest data - -**CRITICAL Handling:** When user selects a code/number, consult the bmad-manifest.json capability mapping: -- **prompt:{name}** — Load and use the actual prompt from `{name}.md` — DO NOT invent the capability on the fly -- **skill:{name}** — Invoke the skill by its exact registered name +| Capability | Route | +| ----------------- | ----------------------------------- | +| {Capability Name} | Load `./references/{capability}.md` | +| Save Memory | Load `./references/save-memory.md` | diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/autonomous-wake.md b/plugins/bmad/skills/bmad-agent-builder/assets/autonomous-wake.md index fc604eb..dc82e80 100644 --- a/plugins/bmad/skills/bmad-agent-builder/assets/autonomous-wake.md +++ b/plugins/bmad/skills/bmad-agent-builder/assets/autonomous-wake.md @@ -9,17 +9,12 @@ You're running autonomously. No one is here. No task was specified. Execute your ## Context -- Memory location: `_bmad/_memory/{skillName}-sidecar/` +- Memory location: `_bmad/memory/{skillName}-sidecar/` - Activation time: `{current-time}` ## Instructions -- Don't ask questions -- Don't wait for input -- Don't greet anyone -- Execute your default wake behavior -- Write results to memory -- Exit +Execute your default wake behavior, write results to memory, and exit. ## Default Wake Behavior @@ -27,7 +22,7 @@ You're running autonomously. No one is here. No task was specified. Execute your ## Logging -Append to `_bmad/_memory/{skillName}-sidecar/autonomous-log.md`: +Append to `_bmad/memory/{skillName}-sidecar/autonomous-log.md`: ```markdown ## {YYYY-MM-DD HH:MM} - Autonomous Wake diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/init-template.md b/plugins/bmad/skills/bmad-agent-builder/assets/init-template.md index 8a946f7..6131389 100644 --- a/plugins/bmad/skills/bmad-agent-builder/assets/init-template.md +++ b/plugins/bmad/skills/bmad-agent-builder/assets/init-template.md @@ -1,15 +1,17 @@ {if-module} + # First-Run Setup for {displayName} Welcome! Setting up your workspace. ## Memory Location -Creating `_bmad/_memory/{skillName}-sidecar/` for persistent memory. +Creating `_bmad/memory/{skillName}-sidecar/` for persistent memory. ## Initial Structure Creating: + - `index.md` — essential context, active work - `patterns.md` — your preferences I learn - `chronology.md` — session timeline @@ -24,19 +26,21 @@ Setup complete! I'm ready to help. {/if-module} {if-standalone} + # First-Run Setup for {displayName} Welcome! Let me set up for this environment. ## Memory Location -Creating `_bmad/_memory/{skillName}-sidecar/` for persistent memory. +Creating `_bmad/memory/{skillName}-sidecar/` for persistent memory. {custom-init-questions} ## Initial Structure Creating: + - `index.md` — essential context, active work, saved paths above - `patterns.md` — your preferences I learn - `chronology.md` — session timeline diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/memory-system.md b/plugins/bmad/skills/bmad-agent-builder/assets/memory-system.md index 8c3946c..47318c0 100644 --- a/plugins/bmad/skills/bmad-agent-builder/assets/memory-system.md +++ b/plugins/bmad/skills/bmad-agent-builder/assets/memory-system.md @@ -1,6 +1,6 @@ # Memory System for {displayName} -**Memory location:** `_bmad/_memory/{skillName}-sidecar/` +**Memory location:** `_bmad/memory/{skillName}-sidecar/` ## Core Principle @@ -11,6 +11,7 @@ Tokens are expensive. Only remember what matters. Condense everything to its ess ### `index.md` — Primary Source **Load on activation.** Contains: + - Essential context (what we're working on) - Active work items - User preferences (condensed) @@ -21,36 +22,43 @@ Tokens are expensive. Only remember what matters. Condense everything to its ess ### `access-boundaries.md` — Access Control (Required for all agents) **Load on activation.** Contains: + - **Read access** — Folders/patterns this agent can read from - **Write access** — Folders/patterns this agent can write to - **Deny zones** — Explicitly forbidden folders/patterns - **Created by** — Agent builder at creation time, confirmed/adjusted during init **Template structure:** + ```markdown # Access Boundaries for {displayName} ## Read Access + - {folder-path-or-pattern} - {another-folder-or-pattern} ## Write Access + - {folder-path-or-pattern} - {another-folder-or-pattern} ## Deny Zones + - {explicitly-forbidden-path} ``` **Critical:** On every activation, load these boundaries first. Before any file operation (read/write), verify the path is within allowed boundaries. If uncertain, ask user. {if-standalone} + - **User-configured paths** — Additional paths set during init (journal location, etc.) are appended here -{/if-standalone} + {/if-standalone} ### `patterns.md` — Learned Patterns **Load when needed.** Contains: + - User's quirks and preferences discovered over time - Recurring patterns or issues - Conventions learned @@ -60,6 +68,7 @@ Tokens are expensive. Only remember what matters. Condense everything to its ess ### `chronology.md` — Timeline **Load when needed.** Contains: + - Session summaries - Significant events - Progress over time @@ -71,6 +80,7 @@ Tokens are expensive. Only remember what matters. Condense everything to its ess ### Write-Through (Immediate Persistence) Persist immediately when: + 1. **User data changes** — preferences, configurations 2. **Work products created** — entries, documents, code, artifacts 3. **State transitions** — tasks completed, status changes @@ -79,6 +89,7 @@ Persist immediately when: ### Checkpoint (Periodic Persistence) Update periodically after: + - N interactions (default: every 5-10 significant exchanges) - Session milestones (completing a capability/task) - When file grows beyond target size @@ -86,11 +97,13 @@ Update periodically after: ### Save Triggers **After these events, always update memory:** + - {save-trigger-1} - {save-trigger-2} - {save-trigger-3} **Memory is updated via the `[SM] - Save Memory` capability which:** + 1. Reads current index.md 2. Updates with current session context 3. Writes condensed, current version @@ -98,31 +111,11 @@ Update periodically after: ## Write Discipline -Before writing to memory, ask: - -1. **Is this worth remembering?** - - If no → skip - - If yes → continue - -2. **What's the minimum tokens that capture this?** - - Condense to essence - - No fluff, no repetition - -3. **Which file?** - - `index.md` → essential context, active work - - `patterns.md` → user quirks, recurring patterns, conventions - - `chronology.md` → session summaries, significant events - -4. **Does this require index update?** - - If yes → update `index.md` to point to it +Persist only what matters, condensed to minimum tokens. Route to the appropriate file based on content type (see File Structure above). Update `index.md` when other files change. ## Memory Maintenance -Regularly (every few sessions or when files grow large): -1. **Condense verbose entries** — Summarize to essence -2. **Prune outdated content** — Move old items to chronology or remove -3. **Consolidate patterns** — Merge similar entries -4. **Update chronology** — Archive significant past events +Periodically condense, prune, and consolidate memory files to keep them lean. ## First Run diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/quality-report-template.md b/plugins/bmad/skills/bmad-agent-builder/assets/quality-report-template.md deleted file mode 100644 index b6811db..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/assets/quality-report-template.md +++ /dev/null @@ -1,282 +0,0 @@ -# Quality Report: {agent-name} - -**Scanned:** {timestamp} -**Skill Path:** {skill-path} -**Report:** {report-file-path} -**Performed By** QualityReportBot-9001 and {user_name} - -## Executive Summary - -- **Total Issues:** {total-issues} -- **Critical:** {critical} | **High:** {high} | **Medium:** {medium} | **Low:** {low} -- **Overall Quality:** {Excellent|Good|Fair|Poor} -- **Overall Cohesion:** {cohesion-score} -- **Craft Assessment:** {craft-assessment} - - -{executive-narrative} - -### Issues by Category - -| Category | Critical | High | Medium | Low | -|----------|----------|------|--------|-----| -| Structure & Capabilities | {n} | {n} | {n} | {n} | -| Prompt Craft | {n} | {n} | {n} | {n} | -| Execution Efficiency | {n} | {n} | {n} | {n} | -| Path & Script Standards | {n} | {n} | {n} | {n} | -| Agent Cohesion | {n} | {n} | {n} | {n} | -| Creative | — | — | {n} | {n} | - ---- - -## Agent Identity - - - -- **Persona:** {persona-summary} -- **Primary Purpose:** {primary-purpose} -- **Capabilities:** {capability-count} - ---- - -## Strengths - -*What this agent does well — preserve these during optimization:* - - - -{strengths-list} - ---- - -{if-truly-broken} -## Truly Broken or Missing - -*Issues that prevent the agent from working correctly:* - - - -{truly-broken-findings} - ---- -{/if-truly-broken} - -## Detailed Findings by Category - -### 1. Structure & Capabilities - - - -{if-structure-metadata} -**Agent Metadata:** -- Sections found: {sections-list} -- Capabilities: {capabilities-count} -- Memory sidecar: {has-memory} -- Headless mode: {has-headless} -- Manifest valid: {manifest-valid} -- Structure assessment: {structure-assessment} -{/if-structure-metadata} - - - -{structure-findings} - -### 2. Prompt Craft - - - -**Agent Assessment:** -- Agent type: {skill-type-assessment} -- Overview quality: {overview-quality} -- Progressive disclosure: {progressive-disclosure} -- Persona context: {persona-context} -- {skillmd-assessment-notes} - -{if-prompt-health} -**Prompt Health:** {prompts-with-config-header}/{total-prompts} with config header | {prompts-with-progression}/{total-prompts} with progression conditions | {prompts-self-contained}/{total-prompts} self-contained -{/if-prompt-health} - -{prompt-craft-findings} - -### 3. Execution Efficiency - - - -{efficiency-issue-findings} - -{if-efficiency-opportunities} -**Optimization Opportunities:** - - - -{efficiency-opportunities} -{/if-efficiency-opportunities} - -### 4. Path & Script Standards - - - -{if-script-inventory} -**Script Inventory:** {total-scripts} scripts ({by-type-breakdown}) | Missing tests: {missing-tests-list} -{/if-script-inventory} - -{path-script-findings} - -### 5. Agent Cohesion - - - -{if-cohesion-analysis} -**Cohesion Analysis:** - - - -| Dimension | Score | Notes | -|-----------|-------|-------| -| Persona Alignment | {score} | {notes} | -| Capability Completeness | {score} | {notes} | -| Redundancy Level | {score} | {notes} | -| External Integration | {score} | {notes} | -| User Journey | {score} | {notes} | - -{if-consolidation-opportunities} -**Consolidation Opportunities:** - - - -{consolidation-opportunities} -{/if-consolidation-opportunities} -{/if-cohesion-analysis} - -{cohesion-findings} - -{if-creative-suggestions} -**Creative Suggestions:** - - - -{creative-suggestions} -{/if-creative-suggestions} - -### 6. Creative (Edge-Case & Experience Innovation) - - - -**Agent Understanding:** -- **Purpose:** {skill-purpose} -- **Primary User:** {primary-user} -- **Key Assumptions:** -{key-assumptions-list} - -**Enhancement Findings:** - - - -{enhancement-findings} - -{if-top-insights} -**Top Insights:** - - - -{top-insights} -{/if-top-insights} - ---- - -{if-user-journeys} -## User Journeys - -*How different user archetypes experience this agent:* - - - -### {archetype-name} - -{journey-summary} - -**Friction Points:** -{friction-points-list} - -**Bright Spots:** -{bright-spots-list} - - - ---- -{/if-user-journeys} - -{if-autonomous-assessment} -## Autonomous Readiness - - - -- **Overall Potential:** {overall-potential} -- **HITL Interaction Points:** {hitl-count} -- **Auto-Resolvable:** {auto-resolvable-count} -- **Needs Input:** {needs-input-count} -- **Suggested Output Contract:** {output-contract} -- **Required Inputs:** {required-inputs-list} -- **Notes:** {assessment-notes} - ---- -{/if-autonomous-assessment} - -{if-script-opportunities} -## Script Opportunities - - - -**Existing Scripts:** {existing-scripts-list} - - - -{script-opportunity-findings} - -**Token Savings:** {total-estimated-token-savings} | Highest value: {highest-value-opportunity} | Prepass opportunities: {prepass-count} - ---- -{/if-script-opportunities} - -## Quick Wins (High Impact, Low Effort) - - - -| Issue | File | Effort | Impact | -|-------|------|--------|--------| -{quick-wins-rows} - ---- - -## Optimization Opportunities - - - -**Token Efficiency:** -{token-optimization-narrative} - -**Performance:** -{performance-optimization-narrative} - -**Maintainability:** -{maintainability-optimization-narrative} - ---- - -## Recommendations - - - -1. {recommendation-1} -2. {recommendation-2} -3. {recommendation-3} -4. {recommendation-4} -5. {recommendation-5} diff --git a/plugins/bmad/skills/bmad-agent-builder/assets/save-memory.md b/plugins/bmad/skills/bmad-agent-builder/assets/save-memory.md index c6144a2..cc15119 100644 --- a/plugins/bmad/skills/bmad-agent-builder/assets/save-memory.md +++ b/plugins/bmad/skills/bmad-agent-builder/assets/save-memory.md @@ -10,19 +10,7 @@ Immediately persist the current session context to memory. ## Process -1. **Read current index.md** — Load existing context - -2. **Update with current session:** - - What we're working on - - Current state/progress - - Any new preferences or patterns discovered - - Next steps to continue - -3. **Write updated index.md** — Replace content with condensed, current version - -4. **Checkpoint other files if needed:** - - `patterns.md` — Add new patterns discovered - - `chronology.md` — Add session summary if significant +Update `index.md` with current session context (active work, progress, preferences, next steps). Checkpoint `patterns.md` and `chronology.md` if significant changes occurred. ## Output diff --git a/plugins/bmad/skills/bmad-agent-builder/bmad-manifest.json b/plugins/bmad/skills/bmad-agent-builder/bmad-manifest.json deleted file mode 100644 index d9a6ace..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/bmad-manifest.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "module-code": "bmb", - "persona": "An architect guide who helps dreamers and builders create AI agents through conversational discovery. Probes deeper than what users articulate, suggests what they haven't considered, and builds agents that exceed what they imagined.", - "capabilities": [ - { - "name": "build", - "menu-code": "BP", - "description": "Build, edit, or convert agents through six-phase conversational discovery. Covers new agents, format conversion, edits, and fixes.", - "supports-headless": true, - "prompt": "build-process.md", - "phase-name": "anytime", - "output-location": "{bmad_builder_output_folder}" - }, - { - "name": "quality-optimize", - "menu-code": "QO", - "description": "Comprehensive validation and optimization using lint scripts and LLM scanner subagents. Structure, prompt craft, efficiency, and more.", - "supports-headless": true, - "prompt": "quality-optimizer.md", - "phase-name": "anytime", - "output-location": "{bmad_builder_reports}" - } - ] -} diff --git a/plugins/bmad/skills/bmad-agent-builder/bmad-skill-manifest.yaml b/plugins/bmad/skills/bmad-agent-builder/bmad-skill-manifest.yaml deleted file mode 100644 index d0f08ab..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/bmad-skill-manifest.yaml +++ /dev/null @@ -1 +0,0 @@ -type: skill diff --git a/plugins/bmad/skills/bmad-agent-builder/build-process.md b/plugins/bmad/skills/bmad-agent-builder/build-process.md index 4eb52cf..4ff5e4c 100644 --- a/plugins/bmad/skills/bmad-agent-builder/build-process.md +++ b/plugins/bmad/skills/bmad-agent-builder/build-process.md @@ -7,193 +7,149 @@ description: Six-phase conversational discovery process for building BMad agents # Build Process -Build AI agents through six phases of conversational discovery. Act as an architect guide — probe deeper than what users articulate, suggest what they haven't considered, and build something that exceeds what they imagined. +Build AI agents through conversational discovery. Your north star: **outcome-driven design**. Every capability prompt should describe what to achieve, not prescribe how. The agent's persona and identity context inform HOW — capability prompts just need the WHAT. Only add procedural detail where the LLM would genuinely fail without it. ## Phase 1: Discover Intent Understand their vision before diving into specifics. Ask what they want to build and encourage detail. -If editing/converting an existing agent: read it, analyze what exists vs what's missing, understand what needs changing and specifically ensure it conforms to our standard with building new agents upon completion. +### When given an existing agent + +**Critical:** Treat the existing agent as a **description of intent**, not a specification to follow. Extract _who_ this agent is and _what_ it achieves. Do not inherit its verbosity, structure, or mechanical procedures — the old agent is reference material, not a template. + +If the SKILL.md routing already asked the 3-way question (Analyze/Edit/Rebuild), proceed with that intent. Otherwise ask now: + +- **Edit** — changing specific behavior while keeping the current approach +- **Rebuild** — rethinking from core outcomes and persona, full discovery using the old agent as context + +For **Edit**: identify what to change, preserve what works, apply outcome-driven principles to the changed portions. + +For **Rebuild**: read the old agent to understand its goals and personality, then proceed through full discovery as if building new. + +### Discovery questions (don't skip these, even with existing input) + +The best agents come from understanding the human's vision directly. Walk through these conversationally — adapt based on what the user has already shared: + +- **Who IS this agent?** What personality should come through? What's their voice? +- **How should they make the user feel?** What's the interaction model — conversational companion, domain expert, silent background worker, creative collaborator? +- **What's the core outcome?** What does this agent help the user accomplish? What does success look like? +- **What capabilities serve that core outcome?** Not "what features sound cool" — what does the user actually need? +- **What's the one thing this agent must get right?** The non-negotiable. +- **If memory/sidecar:** What's worth remembering across sessions? What should the agent track over time? + +The goal is to conversationally gather enough to cover Phase 2 and 3 naturally. Since users often brain-dump rich detail, adapt subsequent phases to what you already know. ## Phase 2: Capabilities Strategy Early check: internal capabilities only, external skills, both, or unclear? -**If external skills involved:** Suggest `bmad-module-builder` to bundle agents + skills into a cohesive module. Modules are the heart of the BMad ecosystem — shareable packages for any domain. +**If external skills involved:** Suggest `bmad-module-builder` to bundle agents + skills into a cohesive module. **Script Opportunity Discovery** (active probing — do not skip): -Walk through each planned capability with the user and apply these filters: -1. "Does this operation have clear pass/fail criteria?" → Script candidate -2. "Could this run without LLM judgment — no interpretation, no creativity, no ambiguity?" → Strong script candidate -3. "Does it validate, transform, count, parse, format-convert, compare against a schema, or check structure?" → Almost certainly a script - -**Common script-worthy operations:** -- Schema/format validation (JSON, YAML, frontmatter, file structure) -- Data extraction and transformation (parsing, restructuring, field mapping) -- Counting, aggregation, and metric collection (token counts, file counts, summary stats) -- File/directory structure checks (existence, naming conventions, required files) -- Pattern matching against known standards (path conventions, naming rules) -- Comparison operations (diff, version compare, before/after, cross-reference checking) -- Dependency graphing (parsing imports, references, manifest entries) -- Memory structure validation (required sections, path correctness) -- Access boundary extraction and verification -- Pre-processing for LLM capabilities (extract compact metrics from large files so the LLM works from structured data, not raw content) -- Post-processing validation (verify LLM output conforms to expected schema/structure) - -**Present your script plan**: Before moving to Phase 3, explicitly tell the user which operations you plan to implement as scripts vs. prompts, with one-line reasoning for each. Ask if they agree or want to adjust. - -If scripts are planned, the `scripts/` folder will be created. Scripts are invoked from prompts when needed, not run automatically. + +Identify deterministic operations that should be scripts. Load `./references/script-opportunities-reference.md` for guidance. Confirm the script-vs-prompt plan with the user before proceeding. If any scripts require external dependencies (anything beyond Python's standard library), explicitly list each dependency and get user approval — dependencies add install-time cost and require `uv` to be available. ## Phase 3: Gather Requirements -Work through these conversationally: - -- **Name:** Functional (kebab-case), display name, title, icon -- **Overview:** Draft a 2-3 sentence overview following the 3-part formula: - - **What** — What this agent does - - **How** — Role, approach, or key capabilities - - **Why/Outcome** — Value delivered or quality standard - - *Example:* "This skill provides a {role} who helps users {outcome}. Act as {name} — {key quality}." -- **Identity:** Who is this agent? How do they communicate? What guides their decisions? -- **Module context:** Standalone (`bmad-agent-{name}`) or part of a module (`bmad-{modulecode}-agent-{name}`) -- **Activation modes:** - - **Interactive only** — User invokes the agent directly - - **Interactive + Autonomous** — Also runs on schedule/cron for background tasks -- **Memory & Persistence:** - - **Sidecar needed?** — What persists across sessions? - - **Critical data** (must persist immediately): What data is essential to capture the moment it's created? - - **Checkpoint data** (save periodically): What can be batched and saved occasionally? - - **Save triggers:** After which interactions should memory be updated? -- **Capabilities:** - - **Internal prompts:** Capabilities the agent knows itself (each will get its own prompt file) - - **External skills:** Skills the agent invokes (ask for **exact registered skill names** — e.g., `bmad-init`, `skill-creator`) - - Note: Skills may exist now or be created later -- **First-run:** What should it ask on first activation? (standalone only; module-based gets config from module's config.yaml) - -**If autonomous mode is enabled, ask additional questions:** -- **Autonomous tasks:** What should the agent do when waking on a schedule? - - Examples: Review/organize memory, process queue, maintenance tasks, implement tickets -- **Default wake behavior:** What happens with `--headless` | `-H` (no specific task)? -- **Named tasks:** What specific tasks can be invoked with `--headless:{task-name}` or `-H:{task-name}`? - -- **Folder Dominion / Access Boundaries:** - - **What folders can this agent read from?** (e.g., `journals/`, `financials/`, specific file patterns) - - **What folders can this agent write to?** (e.g., output folders, log locations) - - **Are there any explicit deny zones?** (folders the agent must never touch) - - Store these boundaries in memory as the standard `access-boundaries` section (see memory-system template) - -**Key distinction:** Folder dominion (where things live) ≠ agent memory (what persists across sessions) - -- **Path Conventions** (CRITICAL for reliable agent behavior): - - **Memory location:** `{project-root}/_bmad/_memory/{skillName}-sidecar/` - - **Project artifacts:** `{project-root}/_bmad/...` when referencing project-level files - - **Skill-internal files:** Use relative paths (`references/`, `scripts/`) - - **Config variables:** Use directly — they already contain full paths (NO `{project-root}` prefix) - - Correct: `{output_folder}/file.md` - - Wrong: `{project-root}/{output_folder}/file.md` (double-prefix breaks resolution) - - **No absolute paths** (`/Users/...`) or relative prefixes (`./`, `../`) +Gather through conversation: identity, capabilities, activation modes, memory needs, access boundaries. Refer to `./references/standard-fields.md` for conventions. + +Key structural context: + +- **Naming:** Standalone: `bmad-agent-{name}`. Module: `bmad-{modulecode}-agent-{name}` +- **Activation modes:** Interactive only, or Interactive + Headless (schedule/cron for background tasks) +- **Memory architecture:** Sidecar at `{project-root}/_bmad/memory/{skillName}-sidecar/` +- **Access boundaries:** Read/write/deny zones stored in memory + +**If headless mode enabled, also gather:** + +- Default wake behavior (`--headless` | `-H` with no specific task) +- Named tasks (`--headless:{task-name}` or `-H:{task-name}`) + +**Path conventions (CRITICAL):** + +- Memory: `{project-root}/_bmad/memory/{skillName}-sidecar/` +- Project-scope paths: `{project-root}/...` (any path relative to project root) +- Skill-internal: `./references/`, `./scripts/` +- Config variables used directly — they already contain full paths (no `{project-root}` prefix) ## Phase 4: Draft & Refine -Once you have a cohesive idea, think one level deeper. Once you have done this, present a draft outline. Point out vague areas. Ask what else is needed. Iterate until they say they're ready. +Think one level deeper. Present a draft outline. Point out vague areas. Iterate until ready. -## Phase 5: Build +**Pruning check (apply before building):** -**Always load these before building:** -- Load `references/standard-fields.md` — field definitions, description format, path rules -- Load `references/skill-best-practices.md` — authoring patterns (freedom levels, templates, anti-patterns) -- Load `references/quality-dimensions.md` — quick mental checklist for build quality +For every planned instruction — especially in capability prompts — ask: **would the LLM do this correctly given just the agent's persona and the desired outcome?** If yes, cut it. -**Load based on context:** -- **If module-based:** Load `references/metadata-reference.md` — manifest.json field definitions, module metadata structure, config loading requirements -- **Always load** `references/script-opportunities-reference.md` — script opportunity spotting guide, catalog, and output standards. Use this to identify additional script opportunities not caught in Phase 2, even if no scripts were initially planned. +The agent's identity, communication style, and principles establish HOW the agent behaves. Capability prompts should describe WHAT to achieve. If you find yourself writing mechanical procedures in a capability prompt, the persona context should handle it instead. -When confirmed: +Watch especially for: -1. Load template substitution rules from `references/template-substitution-rules.md` and apply +- Step-by-step procedures in capabilities that the LLM would figure out from the outcome description +- Capability prompts that repeat identity/style guidance already in SKILL.md +- Multiple capability files that could be one (or zero — does this need a separate capability at all?) +- Templates or reference files that explain things the LLM already knows -2. Create skill structure using templates from `assets/` folder: - - **SKILL-template.md** — skill wrapper with full persona content embedded - - **init-template.md** — first-run setup (if sidecar) - - **memory-system.md** — memory (if sidecar, saved at root level) - - **autonomous-wake.md** — autonomous activation behavior (if activation_modes includes "autonomous") - - **save-memory.md** — explicit memory save capability (if sidecar enabled) +## Phase 5: Build -3. **Generate bmad-manifest.json** — Use `scripts/manifest.py` (validation is automatic on every write). **IMPORTANT:** The generated manifest must NOT include a `$schema` field — the schema is used for validation tooling only and is not part of the delivered skill. - ```bash - # Create manifest with agent identity - python3 scripts/manifest.py create {skill-path} \ - --persona "Succinct distillation of who this agent is" \ - --module-code {code} # if part of a module \ - --has-memory # if sidecar needed - - # Add each capability - # NOTE: capability description must be VERY short — what it produces, not how it works - python3 scripts/manifest.py add-capability {skill-path} \ - --name {name} --menu-code {MC} --description "Short: what it produces." \ - --supports-autonomous \ - --prompt {name}.md # internal capability - # OR --skill-name {skill} # external skill - # omit both if SKILL.md handles it directly - - # Module capabilities need sequencing metadata (confirm with user): - # - phase-name: which module phase (e.g., "1-analysis", "2-design", "anytime") - # - after: array of skill names that should run before this (inputs/dependencies) - # - before: array of skill names this should run before (downstream consumers) - # - is-required: if true, skills in 'before' are blocked until this completes - # - description: VERY short — what it produces, not how it works - python3 scripts/manifest.py add-capability {skill-path} \ - --name {name} --menu-code {MC} --description "Short: what it produces." \ - --phase-name anytime \ - --after skill-a skill-b \ - --before skill-c \ - --is-required - ``` +**Load these before building:** + +- `./references/standard-fields.md` — field definitions, description format, path rules +- `./references/skill-best-practices.md` — outcome-driven authoring, patterns, anti-patterns +- `./references/quality-dimensions.md` — build quality checklist + +Build the agent using templates from `./assets/` and rules from `./references/template-substitution-rules.md`. Output to `{bmad_builder_output_folder}`. + +**Capability prompts are outcome-driven:** Each `./references/{capability}.md` file should describe what the capability achieves and what "good" looks like — not prescribe mechanical steps. The agent's persona context (identity, communication style, principles in SKILL.md) informs how each capability is executed. Don't repeat that context in every capability prompt. + +**Agent structure** (only create subfolders that are needed): -4. **Folder structure:** ``` {skill-name}/ -├── SKILL.md # Contains full persona content (agent.md embedded) -├── bmad-manifest.json # Capabilities, persona, memory, module integration -├── init.md # First-run setup (if sidecar) -├── autonomous-wake.md # Autonomous activation (if autonomous mode) -├── save-memory.md # Explicit memory save (if sidecar) -├── {name}.md # Each internal capability prompt -├── references/ # Reference data, schemas, guides (read for context) -│ └── memory-system.md # (if sidecar needed) -├── assets/ # Templates, starter files (copied/transformed into output) -└── scripts/ # Deterministic code — validation, transformation, testing - └── run-tests.sh # uvx-powered test runner (if python tests exist) +├── SKILL.md # Persona, activation, capability routing +├── references/ # Progressive disclosure content +│ ├── {capability}.md # Each internal capability prompt +│ ├── memory-system.md # Memory discipline (if sidecar) +│ ├── init.md # First-run onboarding (if sidecar) +│ ├── autonomous-wake.md # Headless activation (if headless) +│ └── save-memory.md # Explicit memory save (if sidecar) +├── assets/ # Templates, starter files +└── scripts/ # Deterministic code with tests ``` -**What goes where:** -| Location | Contains | LLM relationship | -|----------|----------|-----------------| -| **Root `.md` files** | Prompt/instruction files, subagent definitions | LLM **loads and executes** these as instructions — they are extensions of SKILL.md | -| **`references/`** | Reference data, schemas, tables, examples, guides | LLM **reads for context** — informational, not executable | -| **`assets/`** | Templates, starter files, boilerplate | LLM **copies/transforms** these into output — not for reasoning | -| **`scripts/`** | Python, shell scripts with tests | LLM **invokes** these — deterministic operations that don't need judgment | +| Location | Contains | LLM relationship | +| ------------------- | ---------------------------------- | ------------------------------------ | +| **SKILL.md** | Persona, activation, routing | LLM identity and router | +| **`./references/`** | Capability prompts, reference data | Loaded on demand | +| **`./assets/`** | Templates, starter files | Copied/transformed into output | +| **`./scripts/`** | Python, shell scripts with tests | Invoked for deterministic operations | + +**Activation guidance for built agents:** + +Activation is a single flow regardless of mode. It should: + +- Load config and resolve values (with defaults) +- Load sidecar `index.md` if the agent has memory +- If headless, route to `./references/autonomous-wake.md` +- If interactive, greet the user and continue from memory context or offer capabilities -Only create subfolders that are needed — most skills won't need all four. +**If the built agent includes scripts**, also load `./references/script-standards.md` — ensures PEP 723 metadata, correct shebangs, and `uv run` invocation from the start. -5. Output to `bmad_builder_output_folder` from config, or `{project-root}/bmad-builder-creations/` +**Lint gate** — after building, validate and auto-fix: -6. **Lint gate** — run deterministic validation scripts: +If subagents available, delegate lint-fix to a subagent. Otherwise run inline. + +1. Run both lint scripts in parallel: ```bash - python3 scripts/scan-path-standards.py {skill-path} - python3 scripts/scan-scripts.py {skill-path} + python3 ./scripts/scan-path-standards.py {skill-path} + python3 ./scripts/scan-scripts.py {skill-path} ``` - - If any script returns critical issues: fix them before proceeding - - If only warnings/medium: note them but proceed +2. Fix high/critical findings and re-run (up to 3 attempts per script) +3. Run unit tests if scripts exist in the built skill ## Phase 6: Summary -Present what was built: location, structure, first-run behavior, capabilities. Ask if adjustments needed. - -**After the build completes, offer quality optimization:** - -Ask: *"Build is done. Would you like to run a Quality Scan to optimize the agent further?"* +Present what was built: location, structure, first-run behavior, capabilities. -If yes, load `quality-optimizer.md` with `{scan_mode}=full` and the agent path. +Run unit tests if scripts exist. Remind user to commit before quality analysis. -Remind them: BMad module system compliant. Use `bmad-init` skill to integrate into a project. +**Offer quality analysis:** Ask if they'd like a Quality Analysis to identify opportunities. If yes, load `quality-analysis.md` with the agent path. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-analysis.md b/plugins/bmad/skills/bmad-agent-builder/quality-analysis.md new file mode 100644 index 0000000..c9c12c1 --- /dev/null +++ b/plugins/bmad/skills/bmad-agent-builder/quality-analysis.md @@ -0,0 +1,130 @@ +--- +name: quality-analysis +description: Comprehensive quality analysis for BMad agents. Runs deterministic lint scripts and spawns parallel subagents for judgment-based scanning. Produces a synthesized report with agent portrait, capability dashboard, themes, and actionable opportunities. +menu-code: QA +--- + +**Language:** Use `{communication_language}` for all output. + +# BMad Method · Quality Analysis + +You orchestrate quality analysis on a BMad agent. Deterministic checks run as scripts (fast, zero tokens). Judgment-based analysis runs as LLM subagents. A report creator synthesizes everything into a unified, theme-based report with agent portrait and capability dashboard. + +## Your Role + +**DO NOT read the target agent's files yourself.** Scripts and subagents do all analysis. You orchestrate: run scripts, spawn scanners, hand off to the report creator. + +## Headless Mode + +If `{headless_mode}=true`, skip all user interaction, use safe defaults, note warnings, and output structured JSON as specified in Present to User. + +## Pre-Scan Checks + +Check for uncommitted changes. In headless mode, note warnings and proceed. In interactive mode, inform the user and confirm. Also confirm the agent is currently functioning. + +## Analysis Principles + +**Effectiveness over efficiency.** Agent personality is investment, not waste. The report presents opportunities — the user applies judgment. Never suggest flattening an agent's voice unless explicitly asked. + +## Scanners + +### Lint Scripts (Deterministic — Run First) + +| # | Script | Focus | Output File | +| --- | -------------------------------- | --------------------------------------- | -------------------------- | +| S1 | `scripts/scan-path-standards.py` | Path conventions | `path-standards-temp.json` | +| S2 | `scripts/scan-scripts.py` | Script portability, PEP 723, unit tests | `scripts-temp.json` | + +### Pre-Pass Scripts (Feed LLM Scanners) + +| # | Script | Feeds | Output File | +| --- | ------------------------------------------- | ---------------------------- | ------------------------------------- | +| P1 | `scripts/prepass-structure-capabilities.py` | structure scanner | `structure-capabilities-prepass.json` | +| P2 | `scripts/prepass-prompt-metrics.py` | prompt-craft scanner | `prompt-metrics-prepass.json` | +| P3 | `scripts/prepass-execution-deps.py` | execution-efficiency scanner | `execution-deps-prepass.json` | + +### LLM Scanners (Judgment-Based — Run After Scripts) + +Each scanner writes a free-form analysis document: + +| # | Scanner | Focus | Pre-Pass? | Output File | +| --- | ------------------------------------------- | ------------------------------------------------------------------------- | --------- | --------------------------------------- | +| L1 | `quality-scan-structure.md` | Structure, capabilities, identity, memory, consistency | Yes | `structure-analysis.md` | +| L2 | `quality-scan-prompt-craft.md` | Token efficiency, outcome balance, persona voice, per-capability craft | Yes | `prompt-craft-analysis.md` | +| L3 | `quality-scan-execution-efficiency.md` | Parallelization, delegation, memory loading, context optimization | Yes | `execution-efficiency-analysis.md` | +| L4 | `quality-scan-agent-cohesion.md` | Persona-capability alignment, identity coherence, per-capability cohesion | No | `agent-cohesion-analysis.md` | +| L5 | `quality-scan-enhancement-opportunities.md` | Edge cases, experience gaps, user journeys, headless potential | No | `enhancement-opportunities-analysis.md` | +| L6 | `quality-scan-script-opportunities.md` | Deterministic operations that should be scripts | No | `script-opportunities-analysis.md` | + +## Execution + +First create output directory: `{bmad_builder_reports}/{skill-name}/quality-analysis/{date-time-stamp}/` + +### Step 1: Run All Scripts (Parallel) + +```bash +python3 scripts/scan-path-standards.py {skill-path} -o {report-dir}/path-standards-temp.json +python3 scripts/scan-scripts.py {skill-path} -o {report-dir}/scripts-temp.json +python3 scripts/prepass-structure-capabilities.py {skill-path} -o {report-dir}/structure-capabilities-prepass.json +python3 scripts/prepass-prompt-metrics.py {skill-path} -o {report-dir}/prompt-metrics-prepass.json +uv run scripts/prepass-execution-deps.py {skill-path} -o {report-dir}/execution-deps-prepass.json +``` + +### Step 2: Spawn LLM Scanners (Parallel) + +After scripts complete, spawn all scanners as parallel subagents. + +**With pre-pass (L1, L2, L3):** provide pre-pass JSON path. +**Without pre-pass (L4, L5, L6):** provide skill path and output directory. + +Each subagent loads the scanner file, analyzes the agent, writes analysis to the output directory, returns the filename. + +### Step 3: Synthesize Report + +Spawn a subagent with `report-quality-scan-creator.md`. + +Provide: + +- `{skill-path}` — The agent being analyzed +- `{quality-report-dir}` — Directory with all scanner output + +The report creator reads everything, synthesizes agent portrait + capability dashboard + themes, writes: + +1. `quality-report.md` — Narrative markdown with BMad Method branding +2. `report-data.json` — Structured data for HTML + +### Step 4: Generate HTML Report + +```bash +python3 scripts/generate-html-report.py {report-dir} --open +``` + +## Present to User + +**IF `{headless_mode}=true`:** + +Read `report-data.json` and output: + +```json +{ + "headless_mode": true, + "scan_completed": true, + "report_file": "{path}/quality-report.md", + "html_report": "{path}/quality-report.html", + "data_file": "{path}/report-data.json", + "grade": "Excellent|Good|Fair|Poor", + "opportunities": 0, + "broken": 0 +} +``` + +**IF interactive:** + +Read `report-data.json` and present: + +1. Agent portrait — icon, name, title +2. Grade and narrative +3. Capability dashboard summary +4. Top opportunities +5. Reports — paths and "HTML opened in browser" +6. Offer: apply fixes, use HTML to select items, discuss findings diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-optimizer.md b/plugins/bmad/skills/bmad-agent-builder/quality-optimizer.md deleted file mode 100644 index 2e22591..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/quality-optimizer.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -name: quality-optimizer -description: Comprehensive quality validation for BMad agents. Runs deterministic lint scripts and spawns parallel subagents for judgment-based scanning. Returns consolidated findings as structured JSON. -menu-code: QO ---- - -**Language:** Use `{communication_language}` for all output. - -# Quality Optimizer - -You orchestrate quality scans on a BMad agent. Deterministic checks run as scripts (fast, zero tokens). Judgment-based analysis runs as LLM subagents. You synthesize all results into a unified report. - -## Your Role: Coordination, Not File Reading - -**DO NOT read the target agent's files yourself.** Scripts and subagents do all analysis. - -Your job: -1. Create output directory -2. Run all lint scripts + pre-pass scripts (instant, deterministic) -3. Spawn all LLM scanner subagents in parallel (with pre-pass data where available) -4. Collect all results -5. Synthesize into unified report (spawn report creator) -6. Present findings to user - -## Autonomous Mode - -**Check if `{headless_mode}=true`** — If set, run in headless mode: -- **Skip ALL questions** — proceed with safe defaults -- **Uncommitted changes:** Note in report, don't ask -- **Agent functioning:** Assume yes, note in report that user should verify -- **After report:** Output summary and exit, don't offer next steps -- **Output format:** Structured JSON summary + report path, minimal conversational text - -**Autonomous mode output:** -```json -{ - "headless_mode": true, - "report_file": "{path-to-report}", - "summary": { ... }, - "warnings": ["Uncommitted changes detected", "Agent functioning not verified"] -} -``` - -## Pre-Scan Checks - -Before running any scans: - -**IF `{headless_mode}=true`:** -1. **Check for uncommitted changes** — Run `git status`. Note in warnings array if found. -2. **Skip agent functioning verification** — Add to warnings: "Agent functioning not verified — user should confirm agent is working before applying fixes" -3. **Proceed directly to scans** - -**IF `{headless_mode}=false` or not set:** -1. **Check for uncommitted changes** — Run `git status` on the repository. If uncommitted changes: - - Warn: "You have uncommitted changes. It's recommended to commit before optimization so you can easily revert if needed." - - Ask: "Do you want to proceed anyway, or commit first?" - - Halt and wait for user response - -2. **Verify agent is functioning** — Ask if the agent is currently working as expected. Optimization should improve, not break working agents. - -## Communicate This Guidance to the User - -**Agent skills are both art and science.** The report will contain many suggestions. Apply these decision rules: - -- **Keep phrasing** that captures the agent's intended voice or personality — leaner isn't always better for persona-driven agents -- **Keep content** that adds clarity for the AI even if a human would find it obvious — the AI needs explicit guidance -- **Prefer scripting** for deterministic operations; **prefer prompting** for creative, contextual, or judgment-based tasks -- **Reject changes** that would flatten the agent's personality unless the user explicitly wants a neutral tone - -## Quality Scanners - -### Lint Scripts (Deterministic — Run First) - -These run instantly, cost zero tokens, and produce structured JSON: - -| # | Script | Focus | Temp Filename | -|---|--------|-------|---------------| -| S1 | `scripts/scan-path-standards.py` | Path conventions: {project-root} only for _bmad, bare _bmad, memory paths, double-prefix, absolute paths | `path-standards-temp.json` | -| S2 | `scripts/scan-scripts.py` | Script portability, PEP 723, agentic design, unit tests | `scripts-temp.json` | - -### Pre-Pass Scripts (Feed LLM Scanners) - -These extract metrics for the LLM scanners so they work from compact data instead of raw files: - -| # | Script | Feeds | Temp Filename | -|---|--------|-------|---------------| -| P1 | `scripts/prepass-structure-capabilities.py` | structure LLM scanner | `structure-capabilities-prepass.json` | -| P2 | `scripts/prepass-prompt-metrics.py` | prompt-craft LLM scanner | `prompt-metrics-prepass.json` | -| P3 | `scripts/prepass-execution-deps.py` | execution-efficiency LLM scanner | `execution-deps-prepass.json` | - -### LLM Scanners (Judgment-Based — Run After Scripts) - -| # | Scanner | Focus | Pre-Pass? | Temp Filename | -|---|---------|-------|-----------|---------------| -| L1 | `quality-scan-structure.md` | Structure, capabilities, identity, memory setup, consistency | Yes — receives prepass JSON | `structure-temp.json` | -| L2 | `quality-scan-prompt-craft.md` | Token efficiency, anti-patterns, outcome balance, persona voice, Overview quality | Yes — receives metrics JSON | `prompt-craft-temp.json` | -| L3 | `quality-scan-execution-efficiency.md` | Parallelization, subagent delegation, memory loading, context optimization | Yes — receives dep graph JSON | `execution-efficiency-temp.json` | -| L4 | `quality-scan-agent-cohesion.md` | Persona-capability alignment, gaps, redundancies, coherence | No | `agent-cohesion-temp.json` | -| L5 | `quality-scan-enhancement-opportunities.md` | Script automation, autonomous potential, edge cases, experience gaps, delight | No | `enhancement-opportunities-temp.json` | -| L6 | `quality-scan-script-opportunities.md` | Deterministic operation detection — finds LLM work that should be scripts instead | No | `script-opportunities-temp.json` | - -## Execution Instructions - -First create output directory: `{bmad_builder_reports}/{skill-name}/quality-scan/{date-time-stamp}/` - -### Step 1: Run Lint Scripts + Pre-Pass Scripts (Parallel) - -Run all applicable scripts in parallel. They output JSON — capture to temp files in the output directory: - -```bash -# Full scan runs all 2 lint scripts + all 3 pre-pass scripts (5 total, all parallel) -python3 scripts/scan-path-standards.py {skill-path} -o {quality-report-dir}/path-standards-temp.json -python3 scripts/scan-scripts.py {skill-path} -o {quality-report-dir}/scripts-temp.json -python3 scripts/prepass-structure-capabilities.py {skill-path} -o {quality-report-dir}/structure-capabilities-prepass.json -python3 scripts/prepass-prompt-metrics.py {skill-path} -o {quality-report-dir}/prompt-metrics-prepass.json -uv run scripts/prepass-execution-deps.py {skill-path} -o {quality-report-dir}/execution-deps-prepass.json -``` - -### Step 2: Spawn LLM Scanners (Parallel) - -After scripts complete, spawn applicable LLM scanners as parallel subagents. - -**For scanners WITH pre-pass (L1, L2, L3):** provide the pre-pass JSON file path so the scanner reads compact metrics instead of raw files. The subagent should read the pre-pass JSON first, then only read raw files for judgment calls the pre-pass doesn't cover. - -**For scanners WITHOUT pre-pass (L4, L5, L6):** provide just the skill path and output directory. - -Each subagent receives: -- Scanner file to load (e.g., `quality-scan-agent-cohesion.md`) -- Skill path to scan: `{skill-path}` -- Output directory for results: `{quality-report-dir}` -- Temp filename for output: `{temp-filename}` -- Pre-pass file path (if applicable): `{quality-report-dir}/{prepass-filename}` - -The subagent will: -- Load the scanner file and operate as that scanner -- Read pre-pass JSON first if provided, then read raw files only as needed -- Output findings as detailed JSON to: `{quality-report-dir}/{temp-filename}.json` -- Return only the filename when complete - -## Synthesis - -After all scripts and scanners complete: - -**IF only lint scripts ran (no LLM scanners):** -1. Read the script output JSON files -2. Present findings directly — these are definitive pass/fail results - -**IF single LLM scanner (with or without scripts):** -1. Read all temp JSON files (script + scanner) -2. Present findings directly in simplified format -3. Skip report creator (not needed for single scanner) - -**IF multiple LLM scanners:** -1. Initiate a subagent with `report-quality-scan-creator.md` - -**Provide the subagent with:** -- `{skill-path}` — The agent being validated -- `{temp-files-dir}` — Directory containing all `*-temp.json` files (both script and LLM results) -- `{quality-report-dir}` — Where to write the final report - -## Generate HTML Report - -After the report creator finishes (or after presenting lint-only / single-scanner results), generate the interactive HTML report: - -```bash -python3 scripts/generate-html-report.py {quality-report-dir} --open -``` - -This produces `{quality-report-dir}/quality-report.html` — a self-contained interactive report with severity filters, collapsible sections, per-item copy-prompt buttons, and a batch prompt generator. The `--open` flag opens it in the default browser. - -## Present Findings to User - -After receiving the JSON summary from the report creator: - -**IF `{headless_mode}=true`:** -1. **Output structured JSON:** -```json -{ - "headless_mode": true, - "scan_completed": true, - "report_file": "{full-path-to-report}", - "html_report": "{full-path-to-html}", - "warnings": ["any warnings from pre-scan checks"], - "summary": { - "total_issues": 0, - "critical": 0, - "high": 0, - "medium": 0, - "low": 0, - "overall_quality": "{Excellent|Good|Fair|Poor}", - "truly_broken_found": false - } -} -``` -2. **Exit** — Don't offer next steps, don't ask questions - -**IF `{headless_mode}=false` or not set:** -1. **High-level summary** with total issues by severity -2. **Highlight truly broken/missing** — CRITICAL and HIGH issues prominently -3. **Mention reports** — "Full report: {report_file}" and "Interactive HTML report opened in browser (also at: {html_report})" -4. **Offer next steps:** - - Apply fixes directly - - Use the HTML report to select specific items and generate prompts - - Discuss specific findings - -## Key Principle - -Your role is ORCHESTRATION: run scripts, spawn subagents, synthesize results. Scripts handle deterministic checks (paths, schema, script standards). LLM scanners handle judgment calls (cohesion, craft, efficiency). You coordinate both and present unified findings. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-agent-cohesion.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-agent-cohesion.md index 66a8f17..ba5fe8b 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-agent-cohesion.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-agent-cohesion.md @@ -9,6 +9,7 @@ You evaluate the overall cohesion of a BMad agent: does the persona align with c ## Your Role Analyze the agent as a unified whole to identify: + - **Gaps** — Capabilities the agent should likely have but doesn't - **Redundancies** — Overlapping capabilities that could be consolidated - **Misalignments** — Capabilities that don't fit the persona or purpose @@ -20,8 +21,8 @@ This is an **opinionated, advisory scan**. Findings are suggestions, not errors. ## Scan Targets Find and read: + - `SKILL.md` — Identity, persona, principles, description -- `bmad-manifest.json` — All capabilities with menu codes and descriptions - `*.md` (prompt files at root) — What each prompt actually does - `references/dimension-definitions.md` — If exists, context for capability design - Look for references to external skills in prompts and SKILL.md @@ -32,14 +33,15 @@ Find and read: **Question:** Does WHO the agent is match WHAT it can do? -| Check | Why It Matters | -|-------|----------------| -| Agent's stated expertise matches its capabilities | An "expert in X" should be able to do core X tasks | -| Communication style fits the persona's role | A "senior engineer" sounds different than a "friendly assistant" | -| Principles are reflected in actual capabilities | Don't claim "user autonomy" if you never ask preferences | -| Description matches what capabilities actually deliver | Misalignment causes user disappointment | +| Check | Why It Matters | +| ------------------------------------------------------ | ---------------------------------------------------------------- | +| Agent's stated expertise matches its capabilities | An "expert in X" should be able to do core X tasks | +| Communication style fits the persona's role | A "senior engineer" sounds different than a "friendly assistant" | +| Principles are reflected in actual capabilities | Don't claim "user autonomy" if you never ask preferences | +| Description matches what capabilities actually deliver | Misalignment causes user disappointment | **Examples of misalignment:** + - Agent claims "expert code reviewer" but has no linting/format analysis - Persona is "friendly mentor" but all prompts are terse and mechanical - Description says "end-to-end project management" but only has task-listing capabilities @@ -48,14 +50,15 @@ Find and read: **Question:** Given the persona and purpose, what's OBVIOUSLY missing? -| Check | Why It Matters | -|-------|----------------| -| Core workflow is fully supported | Users shouldn't need to switch agents mid-task | -| Basic CRUD operations exist if relevant | Can't have "data manager" that only reads | -| Setup/teardown capabilities present | Start and end states matter | -| Output/export capabilities exist | Data trapped in agent is useless | +| Check | Why It Matters | +| --------------------------------------- | ---------------------------------------------- | +| Core workflow is fully supported | Users shouldn't need to switch agents mid-task | +| Basic CRUD operations exist if relevant | Can't have "data manager" that only reads | +| Setup/teardown capabilities present | Start and end states matter | +| Output/export capabilities exist | Data trapped in agent is useless | **Gap detection heuristic:** + - If agent does X, does it also handle related X' and X''? - If agent manages a lifecycle, does it cover all stages? - If agent analyzes something, can it also fix/report on it? @@ -65,13 +68,14 @@ Find and read: **Question:** Are multiple capabilities doing the same thing? -| Check | Why It Matters | -|-------|----------------| -| No overlapping capabilities in manifest | Confuses users, wastes tokens | -- Prompts don't duplicate functionality | Pick ONE place for each behavior | -| Similar capabilities aren't separated | Could be consolidated into stronger single capability | +| Check | Why It Matters | +| --------------------------------------- | ----------------------------------------------------- | +| No overlapping capabilities | Confuses users, wastes tokens | +| - Prompts don't duplicate functionality | Pick ONE place for each behavior | +| Similar capabilities aren't separated | Could be consolidated into stronger single capability | **Redundancy patterns:** + - "Format code" and "lint code" and "fix code style" — maybe one capability? - "Summarize document" and "extract key points" and "get main ideas" — overlapping? - Multiple prompts that read files with slight variations — could parameterize @@ -80,11 +84,11 @@ Find and read: **Question:** How does this agent work with others, and is that intentional? -| Check | Why It Matters | -|-------|----------------| -| Referenced external skills fit the workflow | Random skill calls confuse the purpose | +| Check | Why It Matters | +| -------------------------------------------- | ------------------------------------------- | +| Referenced external skills fit the workflow | Random skill calls confuse the purpose | | Agent can function standalone OR with skills | Don't REQUIRE skills that aren't documented | -| Skill delegation follows a clear pattern | Haphazard calling suggests poor design | +| Skill delegation follows a clear pattern | Haphazard calling suggests poor design | **Note:** If external skills aren't available, infer their purpose from name and usage context. @@ -92,13 +96,14 @@ Find and read: **Question:** Are capabilities at the right level of abstraction? -| Check | Why It Matters | -|-------|----------------| -| Capabilities aren't too granular | 5 similar micro-capabilities should be one | -| Capabilities aren't too broad | "Do everything related to code" isn't a capability | -| Each capability has clear, unique purpose | Users should understand what each does | +| Check | Why It Matters | +| ----------------------------------------- | -------------------------------------------------- | +| Capabilities aren't too granular | 5 similar micro-capabilities should be one | +| Capabilities aren't too broad | "Do everything related to code" isn't a capability | +| Each capability has clear, unique purpose | Users should understand what each does | **Goldilocks test:** + - Too small: "Open file", "Read file", "Parse file" → Should be "Analyze file" - Too large: "Handle all git operations" → Split into clone/commit/branch/PR - Just right: "Create pull request with review template" @@ -107,166 +112,26 @@ Find and read: **Question:** Can a user accomplish meaningful work end-to-end? -| Check | Why It Matters | -|-------|----------------| -| Common workflows are fully supported | Gaps force context switching | -| Capabilities can be chained logically | No dead-end operations | -| Entry points are clear | User knows where to start | -| Exit points provide value | User gets something useful, not just internal state | - -## Analysis Process - -1. **Build mental model** of the agent: - - Who is this agent? (persona, role, expertise) - - What is it FOR? (purpose, outcomes) - - What can it ACTUALLY do? (enumerate all capabilities) - -2. **Evaluate alignment**: - - Does the persona justify the capabilities? - - Are there capabilities that don't fit? - - Is the persona underserving the capabilities? (too modest) - -3. **Gap analysis**: - - For each core purpose, ask "can this agent actually do that?" - - For each key workflow, check if all steps are covered - - Consider adjacent capabilities that should exist - -4. **Redundancy check**: - - Group similar capabilities - - Identify overlaps - - Note consolidation opportunities - -5. **Creative synthesis**: - - What would make this agent MORE useful? - - What's the ONE thing missing that would have biggest impact? - - What's the ONE thing to remove that would clarify focus? - -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/agent-cohesion-temp.json` - -```json -{ - "scanner": "agent-cohesion", - "agent_path": "{path}", - "findings": [ - { - "file": "SKILL.md|bmad-manifest.json|{name}.md", - "severity": "high|medium|low|suggestion|strength", - "category": "gap|redundancy|misalignment|opportunity|strength", - "title": "Brief description", - "detail": "What you noticed, why this matters for cohesion, and what value addressing it would add", - "action": "Specific improvement idea" - } - ], - "assessments": { - "agent_identity": { - "name": "{skill-name}", - "persona_summary": "Brief characterization of who this agent is", - "primary_purpose": "What this agent is for", - "capability_count": 12 - }, - "cohesion_analysis": { - "persona_alignment": { - "score": "strong|moderate|weak", - "notes": "Brief explanation of why persona fits or doesn't fit capabilities" - }, - "capability_completeness": { - "score": "complete|mostly-complete|gaps-obvious", - "missing_areas": ["area1", "area2"], - "notes": "What's missing that should probably be there" - }, - "redundancy_level": { - "score": "clean|some-overlap|significant-redundancy", - "consolidation_opportunities": [ - { - "capabilities": ["cap-a", "cap-b", "cap-c"], - "suggested_consolidation": "How these could be combined" - } - ] - }, - "external_integration": { - "external_skills_referenced": 3, - "integration_pattern": "intentional|incidental|unclear", - "notes": "How external skills fit into the overall design" - }, - "user_journey_score": { - "score": "complete-end-to-end|mostly-complete|fragmented", - "broken_workflows": ["workflow that can't be completed"], - "notes": "Can a user accomplish real work with this agent?" - } - } - }, - "summary": { - "total_findings": 0, - "by_severity": {"high": 0, "medium": 0, "low": 0, "suggestion": 0, "strength": 0}, - "by_category": {"gap": 0, "redundancy": 0, "misalignment": 0, "opportunity": 0, "strength": 0}, - "overall_cohesion": "cohesive|mostly-cohesive|fragmented|confused", - "single_most_important_fix": "The ONE thing that would most improve this agent" - } -} -``` - -Merge all findings into the single `findings[]` array: -- Former `findings[]` items: map `issue` to `title`, merge `observation`+`rationale`+`impact` into `detail`, map `suggestion` to `action` -- Former `strengths[]` items: use `severity: "strength"`, `category: "strength"` -- Former `creative_suggestions[]` items: use `severity: "suggestion"`, map `idea` to `title`, `rationale` to `detail`, merge `type` and `estimated_impact` context into `detail`, map actionable recommendation to `action` - -## Severity Guidelines - -| Severity | When to Use | -|----------|-------------| -| **high** | Glaring omission that would obviously confuse users OR capability that completely contradicts persona | -| **medium** | Clear gap in core workflow OR significant redundancy OR moderate misalignment | -| **low** | Minor enhancement opportunity OR edge case not covered | -| **suggestion** | Creative idea, nice-to-have, speculative improvement | - -## Process - -1. Read SKILL.md to understand persona and intent -2. Read bmad-manifest.json to enumerate all capabilities -3. Read all prompts to understand what each actually does -4. Read dimension-definitions.md if available for context -5. Build mental model of the agent as a whole -6. Evaluate cohesion across all 6 dimensions -7. Generate findings with specific, actionable suggestions -8. Identify strengths (positive feedback is valuable!) -9. Write JSON to `{quality-report-dir}/agent-cohesion-temp.json` -10. Return only the filename: `agent-cohesion-temp.json` - -## Critical After Draft Output - -**Before finalizing, think one level deeper and verify completeness and quality:** - -### Scan Completeness -- Did I read SKILL.md, bmad-manifest.json, and ALL prompts? -- Did I build a complete mental model of the agent? -- Did I evaluate ALL 6 cohesion dimensions (persona, completeness, redundancy, external, granularity, journey)? -- Did I read dimension-definitions.md if it exists? - -### Finding Quality -- Are "gap" findings truly missing or intentionally out of scope? -- Are "redundancy" findings actual overlap or complementary capabilities? -- Are "misalignment" findings real contradictions or just different aspects? -- Are severity ratings appropriate (high only for glaring omissions)? -- Did I include strengths (positive feedback is valuable)? - -### Cohesion Review -- Does single_most_important_fix represent the highest-impact improvement? -- Do findings tell a coherent story about this agent's cohesion? -- Would addressing high-severity issues significantly improve the agent? -- Are creative_suggestions actually valuable, not just nice-to-haves? - -Only after this verification, write final JSON and return filename. - -## Key Principle - -You are NOT checking for syntax errors or missing fields. You are evaluating whether this agent makes sense as a coherent tool. Think like a product designer reviewing a feature set: Is this useful? Is it complete? Does it fit together? Be opinionated but fair—call out what works well, not just what needs improvement. +| Check | Why It Matters | +| ------------------------------------- | --------------------------------------------------- | +| Common workflows are fully supported | Gaps force context switching | +| Capabilities can be chained logically | No dead-end operations | +| Entry points are clear | User knows where to start | +| Exit points provide value | User gets something useful, not just internal state | + +## Output + +Write your analysis as a natural document. This is an opinionated, advisory assessment. Include: + +- **Assessment** — overall cohesion verdict in 2-3 sentences. Does this agent feel authentic and purposeful? +- **Cohesion dimensions** — for each dimension analyzed (persona-capability alignment, identity consistency, capability completeness, etc.), give a score (strong/moderate/weak) and brief explanation +- **Per-capability cohesion** — for each capability, does it fit the agent's identity and expertise? Would this agent naturally have this capability? Flag misalignments. +- **Key findings** — gaps, redundancies, misalignments. Each with severity (high/medium/low/suggestion), affected area, what's off, and how to improve. High = glaring persona contradiction or missing core capability. Medium = clear gap. Low = minor. Suggestion = creative idea. +- **Strengths** — what works well about this agent's coherence +- **Creative suggestions** — ideas that could make the agent more compelling + +Be opinionated but fair. The report creator will synthesize your analysis with other scanners' output. + +Write your analysis to: `{quality-report-dir}/agent-cohesion-analysis.md` + +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-enhancement-opportunities.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-enhancement-opportunities.md index df2b565..c4d49fd 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-enhancement-opportunities.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-enhancement-opportunities.md @@ -6,7 +6,7 @@ You are **DreamBot**, a creative disruptor who pressure-tests agents by imaginin Other scanners check if an agent is built correctly, crafted well, runs efficiently, and holds together. You ask the question none of them do: **"What's missing that nobody thought of?"** -You read an agent and genuinely *inhabit* it — its persona, its identity, its capabilities — imagine yourself as six different users with six different contexts, skill levels, moods, and intentions. Then you find the moments where the agent would confuse, frustrate, dead-end, or underwhelm them. You also find the moments where a single creative addition would transform the experience from functional to delightful. +You read an agent and genuinely _inhabit_ it — its persona, its identity, its capabilities — imagine yourself as six different users with six different contexts, skill levels, moods, and intentions. Then you find the moments where the agent would confuse, frustrate, dead-end, or underwhelm them. You also find the moments where a single creative addition would transform the experience from functional to delightful. This is the BMad dreamer scanner. Your job is to push boundaries, challenge assumptions, and surface the ideas that make builders say "I never thought of that." Then temper each wild idea into a concrete, succinct suggestion the builder can actually act on. @@ -26,10 +26,10 @@ You are NOT checking structure, craft quality, performance, or test coverage — ## Scan Targets Find and read: + - `SKILL.md` — Understand the agent's purpose, persona, audience, and flow - `*.md` (prompt files at root) — Walk through each capability as a user would experience it - `references/*.md` — Understand what supporting material exists -- `references/*.json` — See what supporting schemas exist ## Creative Analysis Lenses @@ -38,6 +38,7 @@ Find and read: Imagine real users in real situations. What breaks, confuses, or dead-ends? **User archetypes to inhabit:** + - The **first-timer** who has never used this kind of tool before - The **expert** who knows exactly what they want and finds the agent too slow - The **confused user** who invoked this agent by accident or with the wrong intent @@ -46,6 +47,7 @@ Imagine real users in real situations. What breaks, confuses, or dead-ends? - The **automator** — a cron job, CI pipeline, or another agent that wants to invoke this agent headless with pre-supplied inputs and get back a result **Questions to ask at each capability:** + - What if the user provides partial, ambiguous, or contradictory input? - What if the user wants to skip this capability or jump to a different one? - What if the user's real need doesn't fit the agent's assumed categories? @@ -55,75 +57,76 @@ Imagine real users in real situations. What breaks, confuses, or dead-ends? ### 2. Experience Gaps -Where does the agent deliver output but miss the *experience*? +Where does the agent deliver output but miss the _experience_? -| Gap Type | What to Look For | -|----------|-----------------| -| **Dead-end moments** | User hits a state where the agent has nothing to offer and no guidance on what to do next | -| **Assumption walls** | Agent assumes knowledge, context, or setup the user might not have | -| **Missing recovery** | Error or unexpected input with no graceful path forward | -| **Abandonment friction** | User wants to stop mid-conversation but there's no clean exit or state preservation | -| **Success amnesia** | Agent completes but doesn't help the user understand or use what was produced | -| **Invisible value** | Agent does something valuable but doesn't surface it to the user | +| Gap Type | What to Look For | +| ------------------------ | ----------------------------------------------------------------------------------------- | +| **Dead-end moments** | User hits a state where the agent has nothing to offer and no guidance on what to do next | +| **Assumption walls** | Agent assumes knowledge, context, or setup the user might not have | +| **Missing recovery** | Error or unexpected input with no graceful path forward | +| **Abandonment friction** | User wants to stop mid-conversation but there's no clean exit or state preservation | +| **Success amnesia** | Agent completes but doesn't help the user understand or use what was produced | +| **Invisible value** | Agent does something valuable but doesn't surface it to the user | ### 3. Delight Opportunities Where could a small addition create outsized positive impact? -| Opportunity Type | Example | -|-----------------|---------| -| **Quick-win mode** | "I already have a spec, skip the interview" — let experienced users fast-track | -| **Smart defaults** | Infer reasonable defaults from context instead of asking every question | -| **Proactive insight** | "Based on what you've described, you might also want to consider..." | -| **Progress awareness** | Help the user understand where they are in a multi-capability workflow | -| **Memory leverage** | Use prior conversation context or project knowledge to personalize | -| **Graceful degradation** | When something goes wrong, offer a useful alternative instead of just failing | -| **Unexpected connection** | "This pairs well with [other skill]" — suggest adjacent capabilities | +| Opportunity Type | Example | +| ------------------------- | ------------------------------------------------------------------------------ | +| **Quick-win mode** | "I already have a spec, skip the interview" — let experienced users fast-track | +| **Smart defaults** | Infer reasonable defaults from context instead of asking every question | +| **Proactive insight** | "Based on what you've described, you might also want to consider..." | +| **Progress awareness** | Help the user understand where they are in a multi-capability workflow | +| **Memory leverage** | Use prior conversation context or project knowledge to personalize | +| **Graceful degradation** | When something goes wrong, offer a useful alternative instead of just failing | +| **Unexpected connection** | "This pairs well with [other skill]" — suggest adjacent capabilities | ### 4. Assumption Audit Every agent makes assumptions. Surface the ones that are most likely to be wrong. -| Assumption Category | What to Challenge | -|--------------------|------------------| -| **User intent** | Does the agent assume a single use case when users might have several? | -| **Input quality** | Does the agent assume well-formed, complete input? | -| **Linear progression** | Does the agent assume users move forward-only through capabilities? | -| **Context availability** | Does the agent assume information that might not be in the conversation? | -| **Single-session completion** | Does the agent assume the interaction completes in one session? | -| **Agent isolation** | Does the agent assume it's the only thing the user is doing? | +| Assumption Category | What to Challenge | +| ----------------------------- | ------------------------------------------------------------------------ | +| **User intent** | Does the agent assume a single use case when users might have several? | +| **Input quality** | Does the agent assume well-formed, complete input? | +| **Linear progression** | Does the agent assume users move forward-only through capabilities? | +| **Context availability** | Does the agent assume information that might not be in the conversation? | +| **Single-session completion** | Does the agent assume the interaction completes in one session? | +| **Agent isolation** | Does the agent assume it's the only thing the user is doing? | -### 5. Autonomous Potential +### 5. Headless Potential Many agents are built for human-in-the-loop interaction — conversational discovery, iterative refinement, user confirmation at each step. But what if someone passed in a headless flag and a detailed prompt? Could this agent just... do its job, create the artifact, and return the file path? -This is one of the most transformative "what ifs" you can ask about a HITL agent. An agent that works both interactively AND autonomously is dramatically more valuable — it can be invoked by other skills, chained in pipelines, run on schedules, or used by power users who already know what they want. +This is one of the most transformative "what ifs" you can ask about a HITL agent. An agent that works both interactively AND headlessly is dramatically more valuable — it can be invoked by other skills, chained in pipelines, run on schedules, or used by power users who already know what they want. **For each HITL interaction point, ask:** -| Question | What You're Looking For | -|----------|------------------------| -| Could this question be answered by input parameters? | "What type of project?" → could come from a prompt or config instead of asking | -| Could this confirmation be skipped with reasonable defaults? | "Does this look right?" → if the input was detailed enough, skip confirmation | -| Is this clarification always needed, or only for ambiguous input? | "Did you mean X or Y?" → only needed when input is vague | -| Does this interaction add value or just ceremony? | Some confirmations exist because the builder assumed interactivity, not because they're necessary | +| Question | What You're Looking For | +| ----------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| Could this question be answered by input parameters? | "What type of project?" → could come from a prompt or config instead of asking | +| Could this confirmation be skipped with reasonable defaults? | "Does this look right?" → if the input was detailed enough, skip confirmation | +| Is this clarification always needed, or only for ambiguous input? | "Did you mean X or Y?" → only needed when input is vague | +| Does this interaction add value or just ceremony? | Some confirmations exist because the builder assumed interactivity, not because they're necessary | -**Assess the agent's autonomous potential:** +**Assess the agent's headless potential:** -| Level | What It Means | -|-------|--------------| -| **Headless-ready** | Could work autonomously today with minimal changes — just needs a flag to skip confirmations | -| **Easily adaptable** | Most interaction points could accept pre-supplied parameters; needs a headless path added to 2-3 capabilities | -| **Partially adaptable** | Core artifact creation could be autonomous, but discovery/interview capabilities are fundamentally interactive — suggest a "skip to build" entry point | -| **Fundamentally interactive** | The value IS the conversation (coaching, brainstorming, exploration) — autonomous mode wouldn't make sense, and that's OK | +| Level | What It Means | +| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Headless-ready** | Could work headlessly today with minimal changes — just needs a flag to skip confirmations | +| **Easily adaptable** | Most interaction points could accept pre-supplied parameters; needs a headless path added to 2-3 capabilities | +| **Partially adaptable** | Core artifact creation could be headless, but discovery/interview capabilities are fundamentally interactive — suggest a "skip to build" entry point | +| **Fundamentally interactive** | The value IS the conversation (coaching, brainstorming, exploration) — headless mode wouldn't make sense, and that's OK | **When the agent IS adaptable, suggest the output contract:** + - What would a headless invocation return? (file path, JSON summary, status code) - What inputs would it need upfront? (parameters that currently come from conversation) - Where would the `{headless_mode}` flag need to be checked? - Which capabilities could auto-resolve vs which need explicit input even in headless mode? -**Don't force it.** Some agents are fundamentally conversational — their value is the interactive exploration. Flag those as "fundamentally interactive" and move on. The insight is knowing which agents *could* transform, not pretending all of them should. +**Don't force it.** Some agents are fundamentally conversational — their value is the interactive exploration. Flag those as "fundamentally interactive" and move on. The insight is knowing which agents _could_ transform, not pretending all should. ### 6. Facilitative Workflow Patterns @@ -131,15 +134,15 @@ If the agent involves collaborative discovery, artifact creation through user in **Check for these patterns:** -| Pattern | What to Look For | If Missing | -|---------|-----------------|------------| -| **Soft Gate Elicitation** | Does the agent use "anything else or shall we move on?" at natural transitions? | Suggest replacing hard menus with soft gates — they draw out information users didn't know they had | -| **Intent-Before-Ingestion** | Does the agent understand WHY the user is here before scanning artifacts/context? | Suggest reordering: greet → understand intent → THEN scan. Scanning without purpose is noise | -| **Capture-Don't-Interrupt** | When users provide out-of-scope info during discovery, does the agent capture it silently or redirect/stop them? | Suggest a capture-and-defer mechanism — users in creative flow share their best insights unprompted | -| **Dual-Output** | Does the agent produce only a human artifact, or also offer an LLM-optimized distillate for downstream consumption? | If the artifact feeds into other LLM workflows, suggest offering a token-efficient distillate alongside the primary output | -| **Parallel Review Lenses** | Before finalizing, does the agent get multiple perspectives on the artifact? | Suggest fanning out 2-3 review subagents (skeptic, opportunity spotter, contextually-chosen third lens) before final output | -| **Three-Mode Architecture** | Does the agent only support one interaction style? | If it produces an artifact, consider whether Guided/Yolo/Autonomous modes would serve different user contexts | -| **Graceful Degradation** | If the agent uses subagents, does it have fallback paths when they're unavailable? | Every subagent-dependent feature should degrade to sequential processing, never block the workflow | +| Pattern | What to Look For | If Missing | +| --------------------------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| **Soft Gate Elicitation** | Does the agent use "anything else or shall we move on?" at natural transitions? | Suggest replacing hard menus with soft gates — they draw out information users didn't know they had | +| **Intent-Before-Ingestion** | Does the agent understand WHY the user is here before scanning artifacts/context? | Suggest reordering: greet → understand intent → THEN scan. Scanning without purpose is noise | +| **Capture-Don't-Interrupt** | When users provide out-of-scope info during discovery, does the agent capture it silently or redirect/stop them? | Suggest a capture-and-defer mechanism — users in creative flow share their best insights unprompted | +| **Dual-Output** | Does the agent produce only a human artifact, or also offer an LLM-optimized distillate for downstream consumption? | If the artifact feeds into other LLM workflows, suggest offering a token-efficient distillate alongside the primary output | +| **Parallel Review Lenses** | Before finalizing, does the agent get multiple perspectives on the artifact? | Suggest fanning out 2-3 review subagents (skeptic, opportunity spotter, contextually-chosen third lens) before final output | +| **Three-Mode Architecture** | Does the agent only support one interaction style? | If it produces an artifact, consider whether Guided/Yolo/Autonomous modes would serve different user contexts | +| **Graceful Degradation** | If the agent uses subagents, does it have fallback paths when they're unavailable? | Every subagent-dependent feature should degrade to sequential processing, never block the workflow | **How to assess:** These patterns aren't mandatory for every agent — a simple utility doesn't need three-mode architecture. But any agent that involves collaborative discovery, user interviews, or artifact creation through guided interaction should be checked against all seven. Flag missing patterns as `medium-opportunity` or `high-opportunity` depending on how transformative they'd be for the specific agent. @@ -148,6 +151,7 @@ If the agent involves collaborative discovery, artifact creation through user in Mentally walk through the agent end-to-end as each user archetype. Document the moments where the journey breaks, stalls, or disappoints. For each journey, note: + - **Entry friction** — How easy is it to get started? What if the user's first message doesn't perfectly match the expected trigger? - **Mid-flow resilience** — What happens if the user goes off-script, asks a tangential question, or provides unexpected input? - **Exit satisfaction** — Does the user leave with a clear outcome, or does the conversation just... stop? @@ -155,123 +159,21 @@ For each journey, note: ## How to Think -1. **Go wild first.** Read the agent and let your imagination run. Think of the weirdest user, the worst timing, the most unexpected input. No idea is too crazy in this phase. - -2. **Then temper.** For each wild idea, ask: "Is there a practical version of this that would actually improve the agent?" If yes, distill it to a sharp, specific suggestion. If the idea is genuinely impractical, drop it — don't pad findings with fantasies. - -3. **Prioritize by user impact.** A suggestion that prevents user confusion outranks a suggestion that adds a nice-to-have feature. A suggestion that transforms the experience outranks one that incrementally improves it. - -4. **Stay in your lane.** Don't flag structural issues (structure scanner handles that), craft quality (prompt-craft handles that), performance (execution-efficiency handles that), or architectural coherence (agent-cohesion handles that). Your findings should be things *only a creative thinker would notice*. - -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/enhancement-opportunities-temp.json` - -```json -{ - "scanner": "enhancement-opportunities", - "skill_path": "{path}", - "findings": [ - { - "file": "SKILL.md|{name}.md", - "severity": "high-opportunity|medium-opportunity|low-opportunity", - "category": "edge-case|experience-gap|delight-opportunity|assumption-risk|journey-friction|autonomous-potential|facilitative-pattern", - "title": "The specific situation or user story that reveals this opportunity", - "detail": "What you noticed, why it matters, and how this would change the user's experience", - "action": "Concrete, actionable improvement — the tempered version of the wild idea" - } - ], - "assessments": { - "skill_understanding": { - "purpose": "What this agent is trying to do", - "primary_user": "Who this agent is for", - "key_assumptions": ["assumption 1", "assumption 2"] - }, - "user_journeys": [ - { - "archetype": "first-timer|expert|confused|edge-case|hostile-environment|automator", - "summary": "Brief narrative of this user's experience with the agent", - "friction_points": ["moment 1", "moment 2"], - "bright_spots": ["what works well for this user"] - } - ], - "autonomous_assessment": { - "potential": "headless-ready|easily-adaptable|partially-adaptable|fundamentally-interactive", - "hitl_points": 0, - "auto_resolvable": 0, - "needs_input": 0, - "suggested_output_contract": "What a headless invocation would return", - "required_inputs": ["parameters needed upfront for headless mode"], - "notes": "Brief assessment of autonomous viability" - }, - "top_insights": [ - { - "title": "The single most impactful creative observation", - "detail": "The user experience impact", - "action": "What to do about it" - } - ] - }, - "summary": { - "total_findings": 0, - "by_severity": {"high-opportunity": 0, "medium-opportunity": 0, "low-opportunity": 0}, - "by_category": { - "edge_case": 0, - "experience_gap": 0, - "delight_opportunity": 0, - "assumption_risk": 0, - "journey_friction": 0, - "autonomous_potential": 0, - "facilitative_pattern": 0 - }, - "assessment": "Brief creative assessment of the agent's user experience, including the boldest practical idea" - } -} -``` - -## Process - -1. Read SKILL.md — deeply understand purpose, persona, audience, and intent -2. Read all prompts — walk through each capability mentally as a user -3. Read resources — understand what's been considered -4. Inhabit each user archetype (including the automator) and mentally simulate their journey through the agent -5. Surface edge cases, experience gaps, delight opportunities, risky assumptions, and autonomous potential -6. For autonomous potential: map every HITL interaction point and assess which could auto-resolve -7. For facilitative/interactive agents: check against all seven facilitative workflow patterns -8. Go wild with ideas, then temper each to a concrete suggestion -9. Prioritize by user impact -10. Write JSON to `{quality-report-dir}/enhancement-opportunities-temp.json` -11. Return only the filename: `enhancement-opportunities-temp.json` - -## Critical After Draft Output - -**Before finalizing, challenge your own findings:** - -### Creative Quality Check -- Did I actually *inhabit* different user archetypes (including the automator), or did I just analyze from the builder's perspective? -- Are my edge cases *realistic* — things that would actually happen — or contrived? -- Are my delight opportunities genuinely delightful, or are they feature bloat? -- Did I find at least one thing that would make the builder say "I never thought of that"? -- Did I honestly assess autonomous potential — not forcing headless on fundamentally interactive agents, but not missing easy wins either? -- For adaptable agents, is my suggested output contract concrete enough to implement? - -### Temper Check -- Is every suggestion *actionable* — could someone implement it from my description? -- Did I drop the impractical wild ideas instead of padding my findings? -- Am I staying in my lane — not flagging structure, craft, performance, or architecture issues? -- Would implementing my top suggestions genuinely improve the user experience? - -### Honesty Check -- Did I note what the agent already does well? (Bright spots in user journeys) -- Are my severity ratings honest — high-opportunity only for genuinely transformative ideas? -- Is my `boldest_idea` actually bold, or is it safe and obvious? - -Only after this verification, write final JSON and return filename. +Explore creatively, then distill each idea into a concrete, actionable suggestion. Prioritize by user impact. Stay in your lane. + +## Output + +Write your analysis as a natural document. Include: + +- **Agent understanding** — purpose, primary user, key assumptions (2-3 sentences) +- **User journeys** — for each archetype (first-timer, expert, confused, edge-case, hostile-environment, automator): brief narrative, friction points, bright spots +- **Headless assessment** — potential level, which interactions could auto-resolve, what headless invocation would need +- **Key findings** — edge cases, experience gaps, delight opportunities. Each with severity (high-opportunity/medium-opportunity/low-opportunity), affected area, what you noticed, and concrete suggestion +- **Top insights** — 2-3 most impactful creative observations +- **Facilitative patterns check** — which patterns are present/missing and which would add most value + +Go wild first, then temper. Prioritize by user impact. The report creator will synthesize your analysis with other scanners' output. + +Write your analysis to: `{quality-report-dir}/enhancement-opportunities-analysis.md` + +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-execution-efficiency.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-execution-efficiency.md index a5b2201..a7fe20b 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-execution-efficiency.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-execution-efficiency.md @@ -6,7 +6,7 @@ You are **ExecutionEfficiencyBot**, a performance-focused quality engineer who v You validate execution efficiency across the entire agent: parallelization, subagent delegation, context management, memory loading strategy, and multi-source analysis patterns. **Why this matters:** Sequential independent operations waste time. Parent reading before delegating bloats context. Loading all memory when only a slice is needed wastes tokens. Efficient execution means faster, cheaper, more reliable agent operation. -This is a unified scan covering both *how work is distributed* (subagent delegation, context optimization) and *how work is ordered* (sequencing, parallelization). These concerns are deeply intertwined. +This is a unified scan covering both _how work is distributed_ (subagent delegation, context optimization) and _how work is ordered_ (sequencing, parallelization). These concerns are deeply intertwined. ## Your Role @@ -17,6 +17,7 @@ Read the pre-pass JSON first at `{quality-report-dir}/execution-deps-prepass.jso Pre-pass provides: dependency graph, sequential patterns, loop patterns, subagent-chain violations, memory loading patterns. Read raw files for judgment calls: + - `SKILL.md` — On Activation patterns, operation flow - `*.md` (prompt files at root) — Each prompt for execution patterns - `references/*.md` — Resource loading patterns @@ -26,16 +27,18 @@ Read raw files for judgment calls: ## Part 1: Parallelization & Batching ### Sequential Operations That Should Be Parallel -| Check | Why It Matters | -|-------|----------------| + +| Check | Why It Matters | +| ----------------------------------------------- | ------------------------------------ | | Independent data-gathering steps are sequential | Wastes time — should run in parallel | -| Multiple files processed sequentially in loop | Should use parallel subagents | -| Multiple tools called in sequence independently | Should batch in one message | +| Multiple files processed sequentially in loop | Should use parallel subagents | +| Multiple tools called in sequence independently | Should batch in one message | ### Tool Call Batching -| Check | Why It Matters | -|-------|----------------| -| Independent tool calls batched in one message | Reduces latency | + +| Check | Why It Matters | +| -------------------------------------------------------- | ---------------------------------- | +| Independent tool calls batched in one message | Reduces latency | | No sequential Read/Grep/Glob calls for different targets | Single message with multiple calls | --- @@ -43,30 +46,34 @@ Read raw files for judgment calls: ## Part 2: Subagent Delegation & Context Management ### Read Avoidance (Critical Pattern) + Don't read files in parent when you could delegate the reading. -| Check | Why It Matters | -|-------|----------------| -| Parent doesn't read sources before delegating analysis | Context stays lean | -| Parent delegates READING, not just analysis | Subagents do heavy lifting | -| No "read all, then analyze" patterns | Context explosion avoided | +| Check | Why It Matters | +| ------------------------------------------------------ | -------------------------- | +| Parent doesn't read sources before delegating analysis | Context stays lean | +| Parent delegates READING, not just analysis | Subagents do heavy lifting | +| No "read all, then analyze" patterns | Context explosion avoided | ### Subagent Instruction Quality -| Check | Why It Matters | -|-------|----------------| -| Subagent prompt specifies exact return format | Prevents verbose output | -| Token limit guidance provided | Ensures succinct results | -| JSON structure required for structured results | Parseable output | -| "ONLY return" or equivalent constraint language | Prevents filler | + +| Check | Why It Matters | +| ----------------------------------------------- | ------------------------ | +| Subagent prompt specifies exact return format | Prevents verbose output | +| Token limit guidance provided | Ensures succinct results | +| JSON structure required for structured results | Parseable output | +| "ONLY return" or equivalent constraint language | Prevents filler | ### Subagent Chaining Constraint + **Subagents cannot spawn other subagents.** Chain through parent. ### Result Aggregation Patterns -| Approach | When to Use | -|----------|-------------| -| Return to parent | Small results, immediate synthesis | -| Write to temp files | Large results (10+ items) | + +| Approach | When to Use | +| -------------------- | ------------------------------------- | +| Return to parent | Small results, immediate synthesis | +| Write to temp files | Large results (10+ items) | | Background subagents | Long-running, no clarification needed | --- @@ -74,16 +81,17 @@ Don't read files in parent when you could delegate the reading. ## Part 3: Agent-Specific Efficiency ### Memory Loading Strategy -| Check | Why It Matters | -|-------|----------------| -| Selective memory loading (only what's needed) | Loading all sidecar files wastes tokens | -| Index file loaded first for routing | Index tells what else to load | -| Memory sections loaded per-capability, not all-at-once | Each capability needs different memory | -| Access boundaries loaded on every activation | Required for security | + +| Check | Why It Matters | +| ------------------------------------------------------ | --------------------------------------- | +| Selective memory loading (only what's needed) | Loading all sidecar files wastes tokens | +| Index file loaded first for routing | Index tells what else to load | +| Memory sections loaded per-capability, not all-at-once | Each capability needs different memory | +| Access boundaries loaded on every activation | Required for security | ``` BAD: Load all memory -1. Read all files in _bmad/_memory/{skillName}-sidecar/ +1. Read all files in _bmad/memory/{skillName}-sidecar/ GOOD: Selective loading 1. Read index.md for configuration @@ -92,90 +100,45 @@ GOOD: Selective loading ``` ### Multi-Source Analysis Delegation -| Check | Why It Matters | -|-------|----------------| + +| Check | Why It Matters | +| ------------------------------------------- | ------------------------------------ | | 5+ source analysis uses subagent delegation | Each source adds thousands of tokens | -| Each source gets its own subagent | Parallel processing | -| Parent coordinates, doesn't read sources | Context stays lean | +| Each source gets its own subagent | Parallel processing | +| Parent coordinates, doesn't read sources | Context stays lean | ### Resource Loading Optimization -| Check | Why It Matters | -|-------|----------------| -| Resources loaded selectively by capability | Not all resources needed every time | -| Large resources loaded on demand | Reference tables only when needed | -| "Essential context" separated from "full reference" | Summary suffices for routing | + +| Check | Why It Matters | +| --------------------------------------------------- | ----------------------------------- | +| Resources loaded selectively by capability | Not all resources needed every time | +| Large resources loaded on demand | Reference tables only when needed | +| "Essential context" separated from "full reference" | Summary suffices for routing | --- ## Severity Guidelines -| Severity | When to Apply | -|----------|---------------| -| **Critical** | Circular dependencies, subagent-spawning-from-subagent | -| **High** | Parent-reads-before-delegating, sequential independent ops with 5+ items, loading all memory unnecessarily | -| **Medium** | Missed batching, subagent instructions without output format, resource loading inefficiency | -| **Low** | Minor parallelization opportunities (2-3 items), result aggregation suggestions | +| Severity | When to Apply | +| ------------ | ---------------------------------------------------------------------------------------------------------- | +| **Critical** | Circular dependencies, subagent-spawning-from-subagent | +| **High** | Parent-reads-before-delegating, sequential independent ops with 5+ items, loading all memory unnecessarily | +| **Medium** | Missed batching, subagent instructions without output format, resource loading inefficiency | +| **Low** | Minor parallelization opportunities (2-3 items), result aggregation suggestions | --- -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/execution-efficiency-temp.json` - -```json -{ - "scanner": "execution-efficiency", - "skill_path": "{path}", - "findings": [ - { - "file": "SKILL.md|{name}.md", - "line": 42, - "severity": "critical|high|medium|low|medium-opportunity", - "category": "sequential-independent|parent-reads-first|missing-batch|no-output-spec|subagent-chain-violation|memory-loading|resource-loading|missing-delegation|parallelization|batching|delegation|memory-optimization|resource-optimization", - "title": "Brief description", - "detail": "What it does now, and estimated time/token savings", - "action": "What it should do instead" - } - ], - "summary": { - "total_findings": 0, - "by_severity": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "by_category": {} - } -} -``` - -Merge all items into the single `findings[]` array: -- Former `issues[]` items: map `issue` to `title`, merge `current_pattern`+`estimated_savings` into `detail`, map `efficient_alternative` to `action` -- Former `opportunities[]` items: map `description` to `title`, merge details into `detail`, map `recommendation` to `action`, use severity like `medium-opportunity` +## Output -## Process +Write your analysis as a natural document. Include: -1. Read pre-pass JSON at `{quality-report-dir}/execution-deps-prepass.json` -2. Read SKILL.md for On Activation and operation flow patterns -3. Read all prompt files for execution patterns -4. Check memory loading strategy (selective vs all-at-once) -5. Check for parent-reading-before-delegating patterns -6. Verify subagent instructions have output specifications -7. Identify sequential operations that could be parallel -8. Check resource loading patterns -9. Write JSON to `{quality-report-dir}/execution-efficiency-temp.json` -10. Return only the filename: `execution-efficiency-temp.json` +- **Assessment** — overall efficiency verdict in 2-3 sentences +- **Key findings** — each with severity (critical/high/medium/low), affected file:line, current pattern, efficient alternative, and estimated savings. Critical = circular deps or subagent-from-subagent. High = parent-reads-before-delegating, sequential independent ops. Medium = missed batching, ordering issues. Low = minor opportunities. +- **Optimization opportunities** — larger structural changes with estimated impact +- **What's already efficient** — patterns worth preserving -## Critical After Draft Output +Be specific about file paths, line numbers, and savings estimates. The report creator will synthesize your analysis with other scanners' output. -Before finalizing, verify: -- Are "sequential-independent" findings truly independent? -- Are "parent-reads-first" findings actual context bloat or necessary prep? -- Are memory loading findings fair — does the agent actually load too much? -- Would implementing suggestions significantly improve efficiency? +Write your analysis to: `{quality-report-dir}/execution-efficiency-analysis.md` -Only after verification, write final JSON and return filename. +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-prompt-craft.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-prompt-craft.md index ee41330..e5afe10 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-prompt-craft.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-prompt-craft.md @@ -4,7 +4,7 @@ You are **PromptCraftBot**, a quality engineer who understands that great agent ## Overview -You evaluate the craft quality of an agent's prompts — SKILL.md and all capability prompts. This covers token efficiency, anti-patterns, outcome focus, and instruction clarity as a **unified assessment** rather than isolated checklists. The reason these must be evaluated together: a finding that looks like "waste" from a pure efficiency lens may be load-bearing persona context that enables the agent to stay in character and handle situations the prompt doesn't explicitly cover. Your job is to distinguish between the two. +You evaluate the craft quality of an agent's prompts — SKILL.md and all capability prompts. This covers token efficiency, anti-patterns, outcome driven focus, and instruction clarity as a **unified assessment** rather than isolated checklists. The reason these must be evaluated together: a finding that looks like "waste" from a pure efficiency lens may be load-bearing persona context that enables the agent to stay in character and handle situations the prompt doesn't explicitly cover. Your job is to distinguish between the two. Guiding principle should be following outcome driven engineering focus. ## Your Role @@ -19,6 +19,7 @@ Read the pre-pass JSON first at `{quality-report-dir}/prompt-metrics-prepass.jso Pre-pass provides: line counts, token estimates, section inventories, waste pattern matches, back-reference matches, config headers, progression conditions. Read raw files for judgment calls: + - `SKILL.md` — Overview quality, persona context assessment - `*.md` (prompt files at root) — Each capability prompt for craft quality - `references/*.md` — Progressive disclosure assessment @@ -40,11 +41,13 @@ A good agent Overview includes: | Design rationale | WHY specific approaches were chosen | Prevents "optimization" of important constraints | **When to flag Overview as excessive:** + - Exceeds ~10-12 sentences for a single-purpose agent - Same concept restated that also appears in Identity or Principles - Philosophical content disconnected from actual behavior **When NOT to flag:** + - Establishes persona context (even if "soft") - Defines domain concepts the agent operates on - Includes theory of mind guidance for user-facing agents @@ -52,21 +55,21 @@ A good agent Overview includes: ### SKILL.md Size & Progressive Disclosure -| Scenario | Acceptable Size | Notes | -|----------|----------------|-------| -| Multi-capability agent with brief capability sections | Up to ~250 lines | Each capability section brief, detail in prompt files | -| Single-purpose agent with deep persona | Up to ~500 lines (~5000 tokens) | Acceptable if content is genuinely needed | -| Agent with large reference tables or schemas inline | Flag for extraction | These belong in references/, not SKILL.md | +| Scenario | Acceptable Size | Notes | +| ----------------------------------------------------- | ------------------------------- | ----------------------------------------------------- | +| Multi-capability agent with brief capability sections | Up to ~250 lines | Each capability section brief, detail in prompt files | +| Single-purpose agent with deep persona | Up to ~500 lines (~5000 tokens) | Acceptable if content is genuinely needed | +| Agent with large reference tables or schemas inline | Flag for extraction | These belong in references/, not SKILL.md | ### Detecting Over-Optimization (Under-Contextualized Agents) -| Symptom | What It Looks Like | Impact | -|---------|-------------------|--------| -| Missing or empty Overview | Jumps to On Activation with no context | Agent follows steps mechanically | -| No persona framing | Instructions without identity context | Agent uses generic personality | -| No domain framing | References concepts without defining them | Agent uses generic understanding | -| Bare procedural skeleton | Only numbered steps with no connective context | Works for utilities, fails for persona agents | -| Missing "what good looks like" | No examples, no quality bar | Technically correct but characterless output | +| Symptom | What It Looks Like | Impact | +| ------------------------------ | ---------------------------------------------- | --------------------------------------------- | +| Missing or empty Overview | Jumps to On Activation with no context | Agent follows steps mechanically | +| No persona framing | Instructions without identity context | Agent uses generic personality | +| No domain framing | References concepts without defining them | Agent uses generic understanding | +| Bare procedural skeleton | Only numbered steps with no connective context | Works for utilities, fails for persona agents | +| Missing "what good looks like" | No examples, no quality bar | Technically correct but characterless output | --- @@ -75,31 +78,35 @@ A good agent Overview includes: Capability prompts (prompt `.md` files at skill root) are the working instructions for each capability. These should be more procedural than SKILL.md but maintain persona voice consistency. ### Config Header -| Check | Why It Matters | -|-------|----------------| -| Has config header with language variables | Agent needs `{communication_language}` context | -| Uses bmad-init variables, not hardcoded values | Flexibility across projects | + +| Check | Why It Matters | +| ------------------------------------------- | ---------------------------------------------- | +| Has config header with language variables | Agent needs `{communication_language}` context | +| Uses config variables, not hardcoded values | Flexibility across projects | ### Self-Containment (Context Compaction Survival) -| Check | Why It Matters | -|-------|----------------| -| Prompt works independently of SKILL.md being in context | Context compaction may drop SKILL.md | -| No references to "as described above" or "per the overview" | Break when context compacts | -| Critical instructions in the prompt, not only in SKILL.md | Instructions only in SKILL.md may be lost | + +| Check | Why It Matters | +| ----------------------------------------------------------- | ----------------------------------------- | +| Prompt works independently of SKILL.md being in context | Context compaction may drop SKILL.md | +| No references to "as described above" or "per the overview" | Break when context compacts | +| Critical instructions in the prompt, not only in SKILL.md | Instructions only in SKILL.md may be lost | ### Intelligence Placement -| Check | Why It Matters | -|-------|----------------| -| Scripts handle deterministic operations | Faster, cheaper, reproducible | -| Prompts handle judgment calls | AI reasoning for semantic understanding | -| No script-based classification of meaning | If regex decides what content MEANS, that's wrong | -| No prompt-based deterministic operations | If a prompt validates structure, counts items, parses known formats, or compares against schemas — that work belongs in a script. Flag as `intelligence-placement` with a note that L6 (script-opportunities scanner) will provide detailed analysis | + +| Check | Why It Matters | +| ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Scripts handle deterministic operations | Faster, cheaper, reproducible | +| Prompts handle judgment calls | AI reasoning for semantic understanding | +| No script-based classification of meaning | If regex decides what content MEANS, that's wrong | +| No prompt-based deterministic operations | If a prompt validates structure, counts items, parses known formats, or compares against schemas — that work belongs in a script. Flag as `intelligence-placement` with a note that L6 (script-opportunities scanner) will provide detailed analysis | ### Context Sufficiency -| Check | When to Flag | -|-------|-------------| -| Judgment-heavy prompt with no context on what/why | Always — produces mechanical output | -| Interactive prompt with no user perspective | When capability involves communication | + +| Check | When to Flag | +| -------------------------------------------------- | --------------------------------------- | +| Judgment-heavy prompt with no context on what/why | Always — produces mechanical output | +| Interactive prompt with no user perspective | When capability involves communication | | Classification prompt with no criteria or examples | When prompt must distinguish categories | --- @@ -107,6 +114,7 @@ Capability prompts (prompt `.md` files at skill root) are the working instructio ## Part 3: Universal Craft Quality ### Genuine Token Waste + Flag these — always waste: | Pattern | Example | Fix | |---------|---------|-----| @@ -117,6 +125,7 @@ Flag these — always waste: | Conversational filler | "Let's think about..." | Delete or replace with direct instruction | ### Context That Looks Like Waste But Isn't (Agent-Specific) + Do NOT flag these: | Pattern | Why It's Valuable | |---------|-------------------| @@ -128,118 +137,79 @@ Do NOT flag these: | Warm/coaching tone for interactive agents | Affects the agent's personality expression | ### Outcome vs Implementation Balance -| Agent Type | Lean Toward | Rationale | -|------------|-------------|-----------| -| Simple utility agent | Outcome-focused | Just needs to know WHAT to produce | -| Domain expert agent | Outcome + domain context | Needs domain understanding for judgment | -| Companion/interactive agent | Outcome + persona + communication guidance | Needs to read user and adapt | -| Workflow facilitator agent | Outcome + rationale + selective HOW | Needs to understand WHY for routing | + +| Agent Type | Lean Toward | Rationale | +| --------------------------- | ------------------------------------------ | --------------------------------------- | +| Simple utility agent | Outcome-focused | Just needs to know WHAT to produce | +| Domain expert agent | Outcome + domain context | Needs domain understanding for judgment | +| Companion/interactive agent | Outcome + persona + communication guidance | Needs to read user and adapt | +| Workflow facilitator agent | Outcome + rationale + selective HOW | Needs to understand WHY for routing | + +### Pruning: Instructions the Agent Doesn't Need + +Beyond micro-step over-specification, check for entire blocks that teach the LLM something it already knows — or that repeat what the agent's persona context already establishes. The pruning test: **"Would the agent do this correctly given just its persona and the desired outcome?"** If yes, the block is noise. + +**Flag as HIGH when a capability prompt contains any of these:** + +| Anti-Pattern | Why It's Noise | Example | +| -------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| Scoring formulas for subjective judgment | LLMs naturally assess relevance without numeric weights | "Score each option: relevance(×4) + novelty(×3)" | +| Capability prompt repeating identity/style from SKILL.md | The agent already has this context — repeating it wastes tokens | Capability prompt restating "You are a meticulous reviewer who..." | +| Step-by-step procedures for tasks the persona covers | The agent's personality and domain expertise handle this | "Step 1: greet warmly. Step 2: ask about their day. Step 3: transition to topic" | +| Per-platform adapter instructions | LLMs know their own platform's tools | Separate instructions for how to use subagents on different platforms | +| Template files explaining general capabilities | LLMs know how to format output, structure responses | A reference file explaining how to write a summary | +| Multiple capability files that could be one | Proliferation of files for what should be a single capability | 3 separate capabilities for "review code", "review tests", "review docs" when one "review" capability suffices | + +**Don't flag as over-specified:** + +- Domain-specific knowledge the agent genuinely needs (API conventions, project-specific rules) +- Design rationale that prevents undermining non-obvious constraints +- Persona-establishing context in SKILL.md (identity, style, principles — this is load-bearing, not waste) ### Structural Anti-Patterns -| Pattern | Threshold | Fix | -|---------|-----------|-----| -| Unstructured paragraph blocks | 8+ lines without headers or bullets | Break into sections | -| Suggestive reference loading | "See XYZ if needed" | Mandatory: "Load XYZ and apply criteria" | -| Success criteria that specify HOW | Listing implementation steps | Rewrite as outcome | + +| Pattern | Threshold | Fix | +| --------------------------------- | ----------------------------------- | ---------------------------------------- | +| Unstructured paragraph blocks | 8+ lines without headers or bullets | Break into sections | +| Suggestive reference loading | "See XYZ if needed" | Mandatory: "Load XYZ and apply criteria" | +| Success criteria that specify HOW | Listing implementation steps | Rewrite as outcome | ### Communication Style Consistency -| Check | Why It Matters | -|-------|----------------| -| Capability prompts maintain persona voice | Inconsistent voice breaks immersion | -| Tone doesn't shift between capabilities | Users expect consistent personality | + +| Check | Why It Matters | +| ------------------------------------------------- | ---------------------------------------- | +| Capability prompts maintain persona voice | Inconsistent voice breaks immersion | +| Tone doesn't shift between capabilities | Users expect consistent personality | | Examples in prompts match SKILL.md style guidance | Contradictory examples confuse the agent | --- ## Severity Guidelines -| Severity | When to Apply | -|----------|---------------| -| **Critical** | Missing progression conditions, self-containment failures, intelligence leaks into scripts | -| **High** | Pervasive defensive padding, SKILL.md over size guidelines with no progressive disclosure, over-optimized complex agent (empty Overview, no persona context), persona voice stripped to bare skeleton | -| **Medium** | Moderate token waste, over-specified procedures, minor voice inconsistency | -| **Low** | Minor verbosity, suggestive reference loading, style preferences | -| **Note** | Observations that aren't issues — e.g., "Persona context is appropriate" | +| Severity | When to Apply | +| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Critical** | Missing progression conditions, self-containment failures, intelligence leaks into scripts | +| **High** | Pervasive over-specification (scoring algorithms, capability prompts repeating persona context, adapter proliferation — see Pruning section), SKILL.md over size guidelines with no progressive disclosure, over-optimized complex agent (empty Overview, no persona context), persona voice stripped to bare skeleton | +| **Medium** | Moderate token waste, isolated over-specified procedures, minor voice inconsistency | +| **Low** | Minor verbosity, suggestive reference loading, style preferences | +| **Note** | Observations that aren't issues — e.g., "Persona context is appropriate" | + +**Effectiveness over efficiency:** Never recommend removing context that could degrade output quality, even if it saves significant tokens. Persona voice, domain framing, and design rationale are investments in quality, not waste. When in doubt about whether context is load-bearing, err on the side of keeping it. --- -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/prompt-craft-temp.json` - -```json -{ - "scanner": "prompt-craft", - "skill_path": "{path}", - "findings": [ - { - "file": "SKILL.md|{name}.md", - "line": 42, - "severity": "critical|high|medium|low|note", - "category": "token-waste|anti-pattern|outcome-balance|progression|self-containment|intelligence-placement|overview-quality|progressive-disclosure|under-contextualized|persona-voice|communication-consistency|inline-data", - "title": "Brief description", - "detail": "Why this matters for prompt craft. Include any nuance about why this might be intentional.", - "action": "Specific action to resolve" - } - ], - "assessments": { - "skill_type_assessment": "simple-utility|domain-expert|companion-interactive|workflow-facilitator", - "skillmd_assessment": { - "overview_quality": "appropriate|excessive|missing|disconnected", - "progressive_disclosure": "good|needs-extraction|monolithic", - "persona_context": "appropriate|excessive|missing", - "notes": "Brief assessment of SKILL.md craft" - }, - "prompts_scanned": 0, - "prompt_health": { - "prompts_with_config_header": 0, - "prompts_with_progression_conditions": 0, - "prompts_self_contained": 0, - "total_prompts": 0 - } - }, - "summary": { - "total_findings": 0, - "by_severity": {"critical": 0, "high": 0, "medium": 0, "low": 0, "note": 0}, - "assessment": "Brief 1-2 sentence assessment", - "top_improvement": "Highest-impact improvement" - } -} -``` - -## Process - -1. Read pre-pass JSON at `{quality-report-dir}/prompt-metrics-prepass.json` -2. Read SKILL.md — assess agent type, evaluate Overview quality, persona context -3. Read all prompt files at skill root -4. Check references/ for progressive disclosure -5. Evaluate Overview quality (present? appropriate? excessive? missing?) -6. Check for over-optimization — is this a complex agent stripped to bare skeleton? -7. Check size and progressive disclosure -8. For each capability prompt: config header, self-containment, context sufficiency -9. Scan for genuine token waste vs load-bearing persona context -10. Evaluate outcome vs implementation balance given agent type -11. Check intelligence placement -12. Check communication style consistency across prompts -13. Write JSON to `{quality-report-dir}/prompt-craft-temp.json` -14. Return only the filename: `prompt-craft-temp.json` - -## Critical After Draft Output - -Before finalizing, verify: -- Did I read pre-pass JSON and EVERY prompt file? -- For each "token-waste" finding: Is this genuinely wasteful, or load-bearing persona context? -- Am I flagging persona voice as waste? Re-evaluate — personality is investment for agents. -- Did I check for under-contextualization? -- Did I check communication style consistency? -- Would implementing ALL suggestions produce a better agent, or strip character? - -Only after verification, write final JSON and return filename. +## Output + +Write your analysis as a natural document. Include: + +- **Assessment** — overall craft verdict: skill type assessment, Overview quality, persona context quality, progressive disclosure, and a 2-3 sentence synthesis +- **Prompt health summary** — how many prompts have config headers, progression conditions, are self-contained +- **Per-capability craft** — for each capability file referenced in the routing table, briefly assess whether it follows outcome-driven principles and whether its voice aligns with the agent's persona. Flag capabilities that are over-specified or under-contextualized. +- **Key findings** — each with severity (critical/high/medium/low), affected file:line, what's wrong, why it matters, and how to fix it. Distinguish genuine waste from persona-serving context. +- **Strengths** — what's well-crafted (worth preserving) + +Write findings in order of severity. Be specific about file paths and line numbers. The report creator will synthesize your analysis with other scanners' output. + +Write your analysis to: `{quality-report-dir}/prompt-craft-analysis.md` + +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-script-opportunities.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-script-opportunities.md index 9e5de21..27dc486 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-script-opportunities.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-script-opportunities.md @@ -15,6 +15,7 @@ Read every prompt file and SKILL.md. For each instruction that tells the LLM to ## Scan Targets Find and read: + - `SKILL.md` — On Activation patterns, inline operations - `*.md` (prompt files at root) — Each capability prompt for deterministic operations hiding in LLM instructions - `references/*.md` — Check if any resource content could be generated by scripts instead @@ -26,21 +27,23 @@ Find and read: For each operation in every prompt, ask: -| Question | If Yes | -|----------|--------| -| Given identical input, will this ALWAYS produce identical output? | Script candidate | -| Could you write a unit test with expected output for every input? | Script candidate | -| Does this require interpreting meaning, tone, context, or ambiguity? | Keep as prompt | -| Is this a judgment call that depends on understanding intent? | Keep as prompt | +| Question | If Yes | +| -------------------------------------------------------------------- | ---------------- | +| Given identical input, will this ALWAYS produce identical output? | Script candidate | +| Could you write a unit test with expected output for every input? | Script candidate | +| Does this require interpreting meaning, tone, context, or ambiguity? | Keep as prompt | +| Is this a judgment call that depends on understanding intent? | Keep as prompt | ## Script Opportunity Categories ### 1. Validation Operations + LLM instructions that check structure, format, schema compliance, naming conventions, required fields, or conformance to known rules. **Signal phrases in prompts:** "validate", "check that", "verify", "ensure format", "must conform to", "required fields" **Examples:** + - Checking frontmatter has required fields → Python script - Validating JSON against a schema → Python script with jsonschema - Verifying file naming conventions → Bash/Python script @@ -49,11 +52,13 @@ LLM instructions that check structure, format, schema compliance, naming convent - Access boundary format verification → Python script ### 2. Data Extraction & Parsing + LLM instructions that pull structured data from files without needing to interpret meaning. **Signal phrases:** "extract", "parse", "pull from", "read and list", "gather all" **Examples:** + - Extracting all {variable} references from markdown files → Python regex - Listing all files in a directory matching a pattern → Bash find/glob - Parsing YAML frontmatter from markdown → Python with pyyaml @@ -62,61 +67,70 @@ LLM instructions that pull structured data from files without needing to interpr - Parsing persona fields from SKILL.md → Python script ### 3. Transformation & Format Conversion + LLM instructions that convert between known formats without semantic judgment. **Signal phrases:** "convert", "transform", "format as", "restructure", "reformat" **Examples:** + - Converting markdown table to JSON → Python script - Restructuring JSON from one schema to another → Python script - Generating boilerplate from a template → Python/Bash script ### 4. Counting, Aggregation & Metrics + LLM instructions that count, tally, summarize numerically, or collect statistics. **Signal phrases:** "count", "how many", "total", "aggregate", "summarize statistics", "measure" **Examples:** + - Token counting per file → Python with tiktoken - Counting capabilities, prompts, or resources → Python script - File size/complexity metrics → Bash wc + Python - Memory file inventory and size tracking → Python script ### 5. Comparison & Cross-Reference + LLM instructions that compare two things for differences or verify consistency between sources. **Signal phrases:** "compare", "diff", "match against", "cross-reference", "verify consistency", "check alignment" **Examples:** -- Comparing manifest entries against actual files → Python script + - Diffing two versions of a document → git diff or Python difflib - Cross-referencing prompt names against SKILL.md references → Python script - Checking config variables are defined where used → Python regex scan -- Verifying menu codes are unique within the agent → Python script ### 6. Structure & File System Checks + LLM instructions that verify directory structure, file existence, or organizational rules. **Signal phrases:** "check structure", "verify exists", "ensure directory", "required files", "folder layout" **Examples:** + - Verifying agent folder has required files → Bash/Python script - Checking for orphaned files not referenced anywhere → Python script - Memory sidecar structure validation → Python script - Directory tree validation against expected layout → Python script ### 7. Dependency & Graph Analysis + LLM instructions that trace references, imports, or relationships between files. **Signal phrases:** "dependency", "references", "imports", "relationship", "graph", "trace" **Examples:** -- Building skill dependency graph from manifest → Python script + +- Building skill dependency graph → Python script - Tracing which resources are loaded by which prompts → Python regex - Detecting circular references → Python graph algorithm - Mapping capability → prompt file → resource file chains → Python script ### 8. Pre-Processing for LLM Capabilities (High-Value, Often Missed) + Operations where a script could extract compact, structured data from large files BEFORE the LLM reads them — reducing token cost and improving LLM accuracy. **This is the most creative category.** Look for patterns where the LLM reads a large file and then extracts specific information. A pre-pass script could do the extraction, giving the LLM a compact JSON summary instead of raw content. @@ -124,6 +138,7 @@ Operations where a script could extract compact, structured data from large file **Signal phrases:** "read and analyze", "scan through", "review all", "examine each" **Examples:** + - Pre-extracting file metrics (line counts, section counts, token estimates) → Python script feeding LLM scanner - Building a compact inventory of capabilities → Python script - Extracting all TODO/FIXME markers → grep/Python script @@ -131,12 +146,14 @@ Operations where a script could extract compact, structured data from large file - Pre-extracting memory system structure for validation → Python script ### 9. Post-Processing Validation (Often Missed) + Operations where a script could verify that LLM-generated output meets structural requirements AFTER the LLM produces it. **Examples:** + - Validating generated JSON against schema → Python jsonschema - Checking generated markdown has required sections → Python script -- Verifying generated manifest has required fields → Python script +- Verifying generated output has required fields → Python script --- @@ -144,17 +161,18 @@ Operations where a script could verify that LLM-generated output meets structura For each finding, estimate the "LLM Tax" — tokens spent per invocation on work a script could do for zero tokens. This makes findings concrete and prioritizable. -| LLM Tax Level | Tokens Per Invocation | Priority | -|---------------|----------------------|----------| -| Heavy | 500+ tokens on deterministic work | High severity | -| Moderate | 100-500 tokens on deterministic work | Medium severity | -| Light | <100 tokens on deterministic work | Low severity | +| LLM Tax Level | Tokens Per Invocation | Priority | +| ------------- | ------------------------------------ | --------------- | +| Heavy | 500+ tokens on deterministic work | High severity | +| Moderate | 100-500 tokens on deterministic work | Medium severity | +| Light | <100 tokens on deterministic work | Low severity | --- ## Your Toolbox Awareness Scripts are NOT limited to simple validation. They have access to: + - **Bash**: Full shell — `jq`, `grep`, `awk`, `sed`, `find`, `diff`, `wc`, `sort`, `uniq`, `curl`, piping, composition - **Python**: Full standard library (`json`, `yaml`, `pathlib`, `re`, `argparse`, `collections`, `difflib`, `ast`, `csv`, `xml`) plus PEP 723 inline-declared dependencies (`tiktoken`, `jsonschema`, `pyyaml`, `toml`, etc.) - **System tools**: `git` for history/diff/blame, filesystem operations, process execution @@ -167,96 +185,36 @@ Think broadly. A script that parses an AST, builds a dependency graph, extracts For each script opportunity found, also assess: -| Dimension | Question | -|-----------|----------| -| **Pre-pass potential** | Could this script feed structured data to an existing LLM scanner? | -| **Standalone value** | Would this script be useful as a lint check independent of the optimizer? | -| **Reuse across skills** | Could this script be used by multiple skills, not just this one? | +| Dimension | Question | +| ----------------------------- | ----------------------------------------------------------------------------------------------------------- | +| **Pre-pass potential** | Could this script feed structured data to an existing LLM scanner? | +| **Standalone value** | Would this script be useful as a lint check independent of quality analysis? | +| **Reuse across skills** | Could this script be used by multiple skills, not just this one? | | **--help self-documentation** | Prompts that invoke this script can use `--help` instead of inlining the interface — note the token savings | --- ## Severity Guidelines -| Severity | When to Apply | -|----------|---------------| -| **High** | Large deterministic operations (500+ tokens) in prompts — validation, parsing, counting, structure checks. Clear script candidates with high confidence. | -| **Medium** | Moderate deterministic operations (100-500 tokens), pre-processing opportunities that would improve LLM accuracy, post-processing validation. | -| **Low** | Small deterministic operations (<100 tokens), nice-to-have pre-pass scripts, minor format conversions. | +| Severity | When to Apply | +| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **High** | Large deterministic operations (500+ tokens) in prompts — validation, parsing, counting, structure checks. Clear script candidates with high confidence. | +| **Medium** | Moderate deterministic operations (100-500 tokens), pre-processing opportunities that would improve LLM accuracy, post-processing validation. | +| **Low** | Small deterministic operations (<100 tokens), nice-to-have pre-pass scripts, minor format conversions. | --- -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/script-opportunities-temp.json` - -```json -{ - "scanner": "script-opportunities", - "skill_path": "{path}", - "findings": [ - { - "file": "SKILL.md|{name}.md", - "line": 42, - "severity": "high|medium|low", - "category": "validation|extraction|transformation|counting|comparison|structure|graph|preprocessing|postprocessing", - "title": "What the LLM is currently doing", - "detail": "Determinism confidence: certain|high|moderate. Estimated token savings: N per invocation. Implementation complexity: trivial|moderate|complex. Language: python|bash|either. Could be prepass: yes/no. Feeds scanner: name if applicable. Reusable across skills: yes/no. Help pattern savings: additional prompt tokens saved by using --help instead of inlining interface.", - "action": "What a script would do instead" - } - ], - "assessments": { - "existing_scripts": ["list of scripts that already exist in the agent's scripts/ folder"] - }, - "summary": { - "total_findings": 0, - "by_severity": {"high": 0, "medium": 0, "low": 0}, - "by_category": {}, - "assessment": "Brief assessment including total estimated token savings, the single highest-value opportunity, and how many findings could become pre-pass scripts for LLM scanners" - } -} -``` - -## Process - -1. Check `scripts/` directory — inventory what scripts already exist (avoid suggesting duplicates) -2. Read SKILL.md — check On Activation and inline operations for deterministic work -3. Read all prompt files — for each instruction, apply the determinism test -4. Read resource files — check if any resource content could be generated/validated by scripts -5. For each finding: estimate LLM tax, assess implementation complexity, check pre-pass potential -6. For each finding: consider the --help pattern — if a prompt currently inlines a script's interface, note the additional savings -7. Write JSON to `{quality-report-dir}/script-opportunities-temp.json` -8. Return only the filename: `script-opportunities-temp.json` - -## Critical After Draft Output - -Before finalizing, verify: - -### Determinism Accuracy -- For each finding: Is this TRULY deterministic, or does it require judgment I'm underestimating? -- Am I confusing "structured output" with "deterministic"? (An LLM summarizing in JSON is still judgment) -- Would the script actually produce the same quality output as the LLM? - -### Creativity Check -- Did I look beyond obvious validation? (Pre-processing and post-processing are often the highest-value opportunities) -- Did I consider the full toolbox? (Not just simple regex — ast parsing, dependency graphs, metric extraction) -- Did I check if any LLM step is reading large files when a script could extract the relevant parts first? - -### Practicality Check -- Are implementation complexity ratings realistic? -- Are token savings estimates reasonable? -- Would implementing the top findings meaningfully improve the agent's efficiency? -- Did I check for existing scripts to avoid duplicates? - -### Lane Check -- Am I staying in my lane? I find script opportunities — I don't evaluate prompt craft (L2), execution efficiency (L3), cohesion (L4), or creative enhancements (L5). - -Only after verification, write final JSON and return filename. +## Output + +Write your analysis as a natural document. Include: + +- **Existing scripts inventory** — what scripts already exist in the agent +- **Assessment** — overall verdict on intelligence placement in 2-3 sentences +- **Key findings** — deterministic operations found in prompts. Each with severity (high/medium/low based on LLM Tax: high = 500+ tokens, medium = 100-500, low = <100), affected file:line, what the LLM is currently doing, what a script would do instead, estimated token savings, and whether it could serve as a pre-pass +- **Aggregate savings** — total estimated token savings across all opportunities + +Be specific about file paths and line numbers. Think broadly about what scripts can accomplish. The report creator will synthesize your analysis with other scanners' output. + +Write your analysis to: `{quality-report-dir}/script-opportunities-analysis.md` + +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/quality-scan-structure.md b/plugins/bmad/skills/bmad-agent-builder/quality-scan-structure.md index e7bceb2..8e4c16a 100644 --- a/plugins/bmad/skills/bmad-agent-builder/quality-scan-structure.md +++ b/plugins/bmad/skills/bmad-agent-builder/quality-scan-structure.md @@ -4,9 +4,9 @@ You are **StructureBot**, a quality engineer who validates the structural integr ## Overview -You validate that an agent's structure is complete, correct, and internally consistent. This covers SKILL.md structure, manifest alignment, capability cross-references, memory setup, identity quality, and logical consistency. **Why this matters:** Structural issues break agents at runtime — missing files, orphaned capabilities, and inconsistent identity make agents unreliable. +You validate that an agent's structure is complete, correct, and internally consistent. This covers SKILL.md structure, capability cross-references, memory setup, identity quality, and logical consistency. **Why this matters:** Structural issues break agents at runtime — missing files, orphaned capabilities, and inconsistent identity make agents unreliable. -This is a unified scan covering both *structure* (correct files, valid sections) and *capabilities* (manifest accuracy, capability-prompt alignment). These concerns are tightly coupled — you can't evaluate capability completeness without validating structural integrity. +This is a unified scan covering both _structure_ (correct files, valid sections) and _capabilities_ (capability-prompt alignment). These concerns are tightly coupled — you can't evaluate capability completeness without validating structural integrity. ## Your Role @@ -14,15 +14,16 @@ Read the pre-pass JSON first at `{quality-report-dir}/structure-capabilities-pre ## Scan Targets -Pre-pass provides: frontmatter validation, section inventory, template artifacts, capability cross-reference, manifest validation, memory path consistency. +Pre-pass provides: frontmatter validation, section inventory, template artifacts, capability cross-reference, memory path consistency. Read raw files ONLY for: + - Description quality assessment (is it specific enough to trigger reliably?) - Identity effectiveness (does the one-sentence identity prime behavior?) - Communication style quality (are examples good? do they match the persona?) - Principles quality (guiding vs generic platitudes?) - Logical consistency (does description match actual capabilities?) -- Activation sequence logical ordering (can't load manifest before config) +- Activation sequence logical ordering - Memory setup completeness for sidecar agents - Access boundaries adequacy - Headless mode setup if declared @@ -32,12 +33,11 @@ Read raw files ONLY for: ## Part 1: Pre-Pass Review Review all findings from `structure-capabilities-prepass.json`: + - Frontmatter issues (missing name, not kebab-case, missing description, no "Use when") - Missing required sections (Overview, Identity, Communication Style, Principles, On Activation) - Invalid sections (On Exit, Exiting) -- Template artifacts (orphaned {if-*}, {displayName}, etc.) -- Manifest validation issues (missing persona field, missing capabilities, duplicate menu codes) -- Capability cross-reference issues (orphaned prompts, missing prompt files) +- Template artifacts (orphaned {if-\*}, {displayName}, etc.) - Memory path inconsistencies - Directness pattern violations @@ -48,136 +48,108 @@ Include all pre-pass findings in your output, preserved as-is. These are determi ## Part 2: Judgment-Based Assessment ### Description Quality -| Check | Why It Matters | -|-------|----------------| -| Description is specific enough to trigger reliably | Vague descriptions cause false activations or missed activations | -| Description mentions key action verbs matching capabilities | Users invoke agents with action-oriented language | -| Description distinguishes this agent from similar agents | Ambiguous descriptions cause wrong-agent activation | -| Description follows two-part format: [5-8 word summary]. [trigger clause] | Standard format ensures consistent triggering behavior | -| Trigger clause uses quoted specific phrases ('create agent', 'optimize agent') | Specific phrases prevent false activations | + +| Check | Why It Matters | +| --------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- | +| Description is specific enough to trigger reliably | Vague descriptions cause false activations or missed activations | +| Description mentions key action verbs matching capabilities | Users invoke agents with action-oriented language | +| Description distinguishes this agent from similar agents | Ambiguous descriptions cause wrong-agent activation | +| Description follows two-part format: [5-8 word summary]. [trigger clause] | Standard format ensures consistent triggering behavior | +| Trigger clause uses quoted specific phrases ('create agent', 'analyze agent') | Specific phrases prevent false activations | | Trigger clause is conservative (explicit invocation) unless organic activation is intentional | Most skills should only fire on direct requests, not casual mentions | ### Identity Effectiveness -| Check | Why It Matters | -|-------|----------------| -| Identity section provides a clear one-sentence persona | This primes the AI's behavior for everything that follows | -| Identity is actionable, not just a title | "You are a meticulous code reviewer" beats "You are CodeBot" | -| Identity connects to the agent's actual capabilities | Persona mismatch creates inconsistent behavior | + +| Check | Why It Matters | +| ------------------------------------------------------ | ------------------------------------------------------------ | +| Identity section provides a clear one-sentence persona | This primes the AI's behavior for everything that follows | +| Identity is actionable, not just a title | "You are a meticulous code reviewer" beats "You are CodeBot" | +| Identity connects to the agent's actual capabilities | Persona mismatch creates inconsistent behavior | ### Communication Style Quality -| Check | Why It Matters | -|-------|----------------| -| Communication style includes concrete examples | Without examples, style guidance is too abstract | -| Style matches the agent's persona and domain | A financial advisor shouldn't use casual gaming language | -| Style guidance is brief but effective | 3-5 examples beat a paragraph of description | + +| Check | Why It Matters | +| ---------------------------------------------- | -------------------------------------------------------- | +| Communication style includes concrete examples | Without examples, style guidance is too abstract | +| Style matches the agent's persona and domain | A financial advisor shouldn't use casual gaming language | +| Style guidance is brief but effective | 3-5 examples beat a paragraph of description | ### Principles Quality -| Check | Why It Matters | -|-------|----------------| -| Principles are guiding, not generic platitudes | "Be helpful" is useless; "Prefer concise answers over verbose explanations" is guiding | -| Principles relate to the agent's specific domain | Generic principles waste tokens | -| Principles create clear decision frameworks | Good principles help the agent resolve ambiguity | + +| Check | Why It Matters | +| ------------------------------------------------ | -------------------------------------------------------------------------------------- | +| Principles are guiding, not generic platitudes | "Be helpful" is useless; "Prefer concise answers over verbose explanations" is guiding | +| Principles relate to the agent's specific domain | Generic principles waste tokens | +| Principles create clear decision frameworks | Good principles help the agent resolve ambiguity | + +### Over-Specification of LLM Capabilities + +Agents should describe outcomes, not prescribe procedures for things the LLM does naturally. The agent's persona context (identity, communication style, principles) informs HOW — capability prompts should focus on WHAT to achieve. Flag these structural indicators: + +| Check | Why It Matters | Severity | +| ------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------- | +| Capability files that repeat identity/style already in SKILL.md | The agent already has persona context — repeating it in each capability wastes tokens and creates maintenance burden | MEDIUM per file, HIGH if pervasive | +| Multiple capability files doing essentially the same thing | Proliferation adds complexity without value — e.g., separate capabilities for "review code", "review tests", "review docs" when one "review" capability covers all | MEDIUM | +| Capability prompts with step-by-step procedures the persona would handle | The agent's expertise and communication style already guide execution — mechanical procedures override natural behavior | MEDIUM if isolated, HIGH if pervasive | +| Template or reference files explaining general LLM capabilities | Files that teach the LLM how to format output, use tools, or greet users — it already knows | MEDIUM | +| Per-platform adapter files or instructions | The LLM knows its own platform — multiple files for different platforms add tokens without preventing failures | HIGH | + +**Don't flag as over-specification:** + +- Domain-specific knowledge the agent genuinely needs +- Persona-establishing context in SKILL.md (identity, style, principles are load-bearing) +- Design rationale for non-obvious choices ### Logical Consistency -| Check | Why It Matters | -|-------|----------------| -| Description matches actual capabilities in manifest | Claiming capabilities that don't exist | -| Identity matches communication style | Identity says "formal expert" but style shows casual examples | -| Activation sequence is logically ordered | Config must load before manifest reads config vars | -| Capabilities referenced in prompts exist in manifest | Prompt references capability not in manifest | + +| Check | Why It Matters | +| ---------------------------------------- | ------------------------------------------------------------- | +| Identity matches communication style | Identity says "formal expert" but style shows casual examples | +| Activation sequence is logically ordered | Config must load before reading config vars | ### Memory Setup (Sidecar Agents) -| Check | Why It Matters | -|-------|----------------| -| Memory system file exists if agent declares sidecar | Sidecar without memory spec is incomplete | -| Access boundaries defined | Critical for autonomous agents especially | -| Memory paths consistent across all files | Different paths in different files break memory | -| Save triggers defined if memory persists | Without save triggers, memory never updates | + +| Check | Why It Matters | +| --------------------------------------------------- | ----------------------------------------------- | +| Memory system file exists if agent declares sidecar | Sidecar without memory spec is incomplete | +| Access boundaries defined | Critical for headless agents especially | +| Memory paths consistent across all files | Different paths in different files break memory | +| Save triggers defined if memory persists | Without save triggers, memory never updates | ### Headless Mode (If Declared) -| Check | Why It Matters | -|-------|----------------| -| Autonomous activation prompt exists | Agent declared autonomous but has no wake prompt | -| Default wake behavior defined | Agent won't know what to do without specific task | -| Autonomous tasks documented | Users need to know available tasks | + +| Check | Why It Matters | +| --------------------------------- | ------------------------------------------------- | +| Headless activation prompt exists | Agent declared headless but has no wake prompt | +| Default wake behavior defined | Agent won't know what to do without specific task | +| Headless tasks documented | Users need to know available tasks | --- ## Severity Guidelines -| Severity | When to Apply | -|----------|---------------| -| **Critical** | Missing SKILL.md, invalid frontmatter (no name), missing required sections, manifest missing or invalid, orphaned capabilities pointing to non-existent files | -| **High** | Description too vague to trigger, identity missing or ineffective, capabilities-manifest mismatch, memory setup incomplete for sidecar, activation sequence logically broken | -| **Medium** | Principles are generic, communication style lacks examples, minor consistency issues, headless mode incomplete | -| **Low** | Style refinement suggestions, principle strengthening opportunities | +| Severity | When to Apply | +| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------- | +| **Critical** | Missing SKILL.md, invalid frontmatter (no name), missing required sections, orphaned capabilities pointing to non-existent files | +| **High** | Description too vague to trigger, identity missing or ineffective, memory setup incomplete for sidecar, activation sequence logically broken | +| **Medium** | Principles are generic, communication style lacks examples, minor consistency issues, headless mode incomplete | +| **Low** | Style refinement suggestions, principle strengthening opportunities | --- -## Output Format - -Output your findings using the universal schema defined in `references/universal-scan-schema.md`. - -Use EXACTLY these field names: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`. Do not rename, restructure, or add fields to findings. - -Before writing output, verify: Is your array called `findings`? Does every item have `title`, `detail`, `action`? Is `assessments` an object, not items in the findings array? - -You will receive `{skill-path}` and `{quality-report-dir}` as inputs. - -Write JSON findings to: `{quality-report-dir}/structure-temp.json` - -```json -{ - "scanner": "structure", - "skill_path": "{path}", - "findings": [ - { - "file": "SKILL.md|bmad-manifest.json|{name}.md", - "line": 42, - "severity": "critical|high|medium|low", - "category": "frontmatter|sections|artifacts|manifest|capabilities|identity|communication-style|principles|consistency|memory-setup|headless-mode|activation-sequence", - "title": "Brief description", - "detail": "", - "action": "Specific action to resolve" - } - ], - "assessments": { - "sections_found": ["Overview", "Identity"], - "capabilities_count": 0, - "has_memory": false, - "has_headless": false, - "manifest_valid": true - }, - "summary": { - "total_findings": 0, - "by_severity": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "by_category": {}, - "assessment": "Brief 1-2 sentence assessment" - } -} -``` - -## Process - -1. Read pre-pass JSON at `{quality-report-dir}/structure-capabilities-prepass.json` -2. Include all pre-pass findings in output -3. Read SKILL.md for judgment-based assessment -4. Read bmad-manifest.json for capability evaluation -5. Read relevant prompt files for cross-reference quality -6. Assess description, identity, communication style, principles quality -7. Check logical consistency across all components -8. Check memory setup completeness if sidecar -9. Check headless mode setup if declared -10. Write JSON to `{quality-report-dir}/structure-temp.json` -11. Return only the filename: `structure-temp.json` - -## Critical After Draft Output - -Before finalizing, verify: -- Did I include ALL pre-pass findings? -- Did I read SKILL.md for judgment calls? -- Did I check logical consistency between description, identity, and capabilities? -- Are my severity ratings appropriate? -- Would implementing my suggestions improve the agent? - -Only after verification, write final JSON and return filename. +## Output + +Write your analysis as a natural document. Include: + +- **Assessment** — overall structural verdict in 2-3 sentences +- **Sections found** — which required/optional sections are present +- **Capabilities inventory** — list each capability with its routing, noting any structural issues per capability +- **Key findings** — each with severity (critical/high/medium/low), affected file:line, what's wrong, and how to fix it +- **Strengths** — what's structurally sound (worth preserving) +- **Memory & headless status** — whether these are set up and correctly configured + +For each capability referenced in the routing table, confirm the target file exists and note any structural issues. This per-capability view feeds the capability dashboard in the final report. + +Write your analysis to: `{quality-report-dir}/structure-analysis.md` + +Return only the filename when complete. diff --git a/plugins/bmad/skills/bmad-agent-builder/references/metadata-reference.md b/plugins/bmad/skills/bmad-agent-builder/references/metadata-reference.md deleted file mode 100644 index 4a0b7e7..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/references/metadata-reference.md +++ /dev/null @@ -1,126 +0,0 @@ -# Manifest Reference - -Every BMad skill has a `bmad-manifest.json` at its root. This is the unified format for agents, workflows, and simple skills. - -## File Location - -``` -{skillname}/ -├── SKILL.md # name, description, persona content -├── bmad-manifest.json # Capabilities, module integration, persona distillate -└── ... -``` - -## SKILL.md Frontmatter (Minimal) - -```yaml ---- -name: bmad-{modulecode}-{skillname} -description: [5-8 word summary]. [Use when user says 'X' or 'Y'.] ---- -``` - -## bmad-manifest.json - -**NOTE:** Do NOT include `$schema` in generated manifests. The schema is used by validation tooling only — it is not part of the delivered skill. - -```json -{ - "module-code": "bmb", - "replaces-skill": "bmad-original-agent", - "persona": "A succinct distillation of who this agent is and how they operate.", - "has-memory": true, - "capabilities": [ - { - "name": "build", - "menu-code": "BP", - "description": "Builds agents through conversational discovery. Outputs to skill folder.", - "supports-headless": true, - "prompt": "build-process.md", - "phase-name": "anytime", - "after": ["create-prd"], - "before": [], - "is-required": false, - "output-location": "{bmad_builder_output_folder}" - }, - { - "name": "external-tool", - "menu-code": "ET", - "description": "Delegates to another registered skill.", - "supports-headless": false, - "skill-name": "bmad-some-other-skill" - } - ] -} -``` - -## Field Reference - -### Top-Level Fields - -| Field | Type | Required | Purpose | -|-------|------|----------|---------| -| `module-code` | string | If module | Short code for namespacing (e.g., `bmb`, `cis`) | -| `replaces-skill` | string | No | Registered skill name this replaces. Inherits metadata during bmad-init. | -| `persona` | string | Agents only | Succinct distillation of the agent's essence. **Presence = this is an agent.** | -| `has-memory` | boolean | No | Whether state persists across sessions via sidecar memory | - -### Capability Fields - -| Field | Type | Required | Purpose | -|-------|------|----------|---------| -| `name` | string | Yes | Kebab-case identifier | -| `menu-code` | string | Yes | 2-3 uppercase letter shortcut for menus | -| `description` | string | Yes | What it does and when to suggest it | -| `supports-autonomous` | boolean | No | Can run without user interaction | -| `prompt` | string | No | Relative path to prompt file (internal capability) | -| `skill-name` | string | No | Registered name of external skill (external capability) | -| `phase-name` | string | No | Module phase this belongs to | -| `after` | array | No | Skill names that should run before this capability | -| `before` | array | No | Skill names this capability should run before | -| `is-required` | boolean | No | If true, skills in `before` are blocked until this completes | -| `output-location` | string | No | Where output goes (may use config variables) | - -### Three Capability Flavors - -1. **Has `prompt`** — internal capability routed to a prompt file -2. **Has `skill-name`** — delegates to another registered skill -3. **Has neither** — SKILL.md handles it directly - -### The `replaces-skill` Field - -When set, the skill inherits metadata from the replaced skill during `bmad-init`. Explicit fields in the new manifest override inherited values. - -## Agent vs Workflow vs Skill - -No type field needed — inferred from content: -- **Has `persona`** → agent -- **No `persona`** → workflow or skill (distinction is complexity, not manifest structure) - -## Config Loading - -All module skills MUST use the `bmad-init` skill at startup. - -## Path Construction Rules — CRITICAL - -Only use `{project-root}` for `_bmad` paths. - -**Three path types:** -- **Skill-internal** — bare relative paths (no prefix) -- **Project `_bmad` paths** — always `{project-root}/_bmad/...` -- **Config variables** — used directly, already contain `{project-root}` in their resolved values - -**Correct:** -``` -references/reference.md # Skill-internal (bare relative) -capability.md # Skill-internal (bare relative) -{project-root}/_bmad/_memory/x-sidecar/ # Project _bmad path -{output_folder}/report.md # Config var (already has full path) -``` - -**Never use:** -``` -../../other-skill/file.md # Cross-skill relative path breaks with reorganization -{project-root}/{config_var}/output.md # Double-prefix -./references/reference.md # Relative prefix breaks context changes -``` diff --git a/plugins/bmad/skills/bmad-agent-builder/references/quality-dimensions.md b/plugins/bmad/skills/bmad-agent-builder/references/quality-dimensions.md index 064d17c..cec2a50 100644 --- a/plugins/bmad/skills/bmad-agent-builder/references/quality-dimensions.md +++ b/plugins/bmad/skills/bmad-agent-builder/references/quality-dimensions.md @@ -1,46 +1,54 @@ # Quality Dimensions — Quick Reference -Six dimensions to keep in mind when building agent skills. The quality scanners check these automatically during optimization — this is a mental checklist for the build phase. +Seven dimensions to keep in mind when building agent skills. The quality scanners check these automatically during quality analysis — this is a mental checklist for the build phase. -## 1. Informed Autonomy +## 1. Outcome-Driven Design + +Describe what each capability achieves, not how to do it step by step. The agent's persona context (identity, communication style, principles) informs HOW — capability prompts just need the WHAT. + +- **The test:** Would removing this instruction cause the agent to produce a worse outcome? If the agent would do it anyway given its persona and the desired outcome, the instruction is noise. +- **Pruning:** If a capability prompt teaches the LLM something it already knows — or repeats guidance already in the agent's identity/style — cut it. +- **When procedure IS value:** Exact script invocations, specific file paths, API calls, security-critical operations. These need low freedom. + +## 2. Informed Autonomy The executing agent needs enough context to make judgment calls when situations don't match the script. The Overview section establishes this: domain framing, theory of mind, design rationale. - Simple agents with 1-2 capabilities need minimal context - Agents with memory, autonomous mode, or complex capabilities need domain understanding, user perspective, and rationale for non-obvious choices -- When in doubt, explain *why* — an agent that understands the mission improvises better than one following blind steps +- When in doubt, explain _why_ — an agent that understands the mission improvises better than one following blind steps -## 2. Intelligence Placement +## 3. Intelligence Placement Scripts handle plumbing (fetch, transform, validate). Prompts handle judgment (interpret, classify, decide). -**Test:** If a script contains an `if` that decides what content *means*, intelligence has leaked. +**Test:** If a script contains an `if` that decides what content _means_, intelligence has leaked. -**Reverse test:** If a prompt validates structure, counts items, parses known formats, compares against schemas, or checks file existence — determinism has leaked into the LLM. That work belongs in a script. Scripts have access to full bash, Python with standard library plus PEP 723 dependencies, and system tools — think broadly about what can be offloaded. +**Reverse test:** If a prompt validates structure, counts items, parses known formats, compares against schemas, or checks file existence — determinism has leaked into the LLM. That work belongs in a script. -## 3. Progressive Disclosure +## 4. Progressive Disclosure SKILL.md stays focused. Detail goes where it belongs. -- Capability instructions → prompt files at skill root -- Reference data, schemas, large tables → `references/` -- Templates, starter files → `assets/` -- Memory discipline → `references/memory-system.md` +- Capability instructions → `./references/` +- Reference data, schemas, large tables → `./references/` +- Templates, starter files → `./assets/` +- Memory discipline → `./references/memory-system.md` - Multi-capability SKILL.md under ~250 lines: fine as-is - Single-purpose up to ~500 lines: acceptable if focused -## 4. Description Format +## 5. Description Format Two parts: `[5-8 word summary]. [Use when user says 'X' or 'Y'.]` -Default to conservative triggering. See `references/standard-fields.md` for full format and examples. +Default to conservative triggering. See `./references/standard-fields.md` for full format. -## 5. Path Construction +## 6. Path Construction -Only use `{project-root}` for `_bmad` paths. Config variables used directly — they already contain `{project-root}`. +Use `{project-root}` for any project-scope path. Use `./` for skill-internal paths. Config variables used directly — they already contain `{project-root}`. -See `references/standard-fields.md` for correct/incorrect patterns. +See `./references/standard-fields.md` for correct/incorrect patterns. -## 6. Token Efficiency +## 7. Token Efficiency -Remove genuine waste (repetition, defensive padding, meta-explanation). Preserve context that enables judgment (domain framing, theory of mind, design rationale). These are different things — the prompt-craft scanner distinguishes between them. +Remove genuine waste (repetition, defensive padding, meta-explanation). Preserve context that enables judgment (persona voice, domain framing, theory of mind, design rationale). These are different things — never trade effectiveness for efficiency. A capability that works correctly but uses extra tokens is always better than one that's lean but fails edge cases. diff --git a/plugins/bmad/skills/bmad-agent-builder/references/script-opportunities-reference.md b/plugins/bmad/skills/bmad-agent-builder/references/script-opportunities-reference.md index fecbed0..360d98c 100644 --- a/plugins/bmad/skills/bmad-agent-builder/references/script-opportunities-reference.md +++ b/plugins/bmad/skills/bmad-agent-builder/references/script-opportunities-reference.md @@ -17,16 +17,20 @@ Scripts validate structure and syntax (deterministic). Prompts evaluate semantic During build, walk through every capability/operation and apply these tests: ### The Determinism Test + For each operation the agent performs, ask: + - Given identical input, will this ALWAYS produce identical output? → Script - Does this require interpreting meaning, tone, context, or ambiguity? → Prompt - Could you write a unit test with expected output for every input? → Script ### The Judgment Boundary + Scripts handle: fetch, transform, validate, count, parse, compare, extract, format, check structure Prompts handle: interpret, classify with ambiguity, create, decide with incomplete info, evaluate quality, synthesize meaning ### Pattern Recognition Checklist + Table of signal verbs/patterns mapping to script types: | Signal Verb/Pattern | Script Type | |---------------------|-------------| @@ -41,21 +45,26 @@ Table of signal verbs/patterns mapping to script types: | "graph", "map dependencies" | Dependency analysis script | ### The Outside-the-Box Test + Beyond obvious validation, consider: + - Could any data gathering step be a script that returns structured JSON for the LLM to interpret? - Could pre-processing reduce what the LLM needs to read? - Could post-processing validate what the LLM produced? - Could metric collection feed into LLM decision-making without the LLM doing the counting? ### Your Toolbox -Scripts have access to full capabilities — think broadly: -- **Bash**: Full shell — `jq`, `grep`, `awk`, `sed`, `find`, `diff`, `wc`, `sort`, `uniq`, `curl`, plus piping and composition -- **Python**: Standard library (`json`, `yaml`, `pathlib`, `re`, `argparse`, `collections`, `difflib`, `ast`, `csv`, `xml`, etc.) plus PEP 723 inline-declared dependencies (`tiktoken`, `jsonschema`, `pyyaml`, etc.) -- **System tools**: `git` commands for history/diff/blame, filesystem operations, process execution + +**Python is the default** for all script logic (cross-platform: macOS, Linux, Windows/WSL). See `references/script-standards.md` for full rationale and safe bash commands. + +- **Python:** Standard library (`json`, `pathlib`, `re`, `argparse`, `collections`, `difflib`, `ast`, `csv`, `xml`, etc.) plus PEP 723 inline-declared dependencies (`tiktoken`, `jsonschema`, `pyyaml`, etc.) +- **Safe shell commands:** `git`, `gh`, `uv run`, `npm`/`npx`/`pnpm`, `mkdir -p` +- **Avoid bash for logic** — no piping, `jq`, `grep`, `sed`, `awk`, `find`, `diff`, `wc` in scripts. Use Python equivalents instead. If you can express the logic as deterministic code, it's a script candidate. ### The --help Pattern + All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a script, it can say "Run `scripts/foo.py --help` to understand inputs/outputs, then invoke appropriately" instead of inlining the script's interface. This saves tokens in prompts and keeps a single source of truth for the script's API. --- @@ -69,6 +78,7 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc **Why:** Frontmatter is the #1 factor in skill triggering. Catch errors early. **Checks:** + ```python # checks: - name exists and is kebab-case @@ -83,19 +93,7 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 2. Manifest Schema Validator - -**Status:** ✅ Already exists at `scripts/manifest.py` (create, add-capability, update, read, validate) - -**Enhancement opportunities:** -- Add `--agent-path` flag for auto-discovery -- Check menu code uniqueness within agent -- Verify prompt files exist for `type: "prompt"` capabilities -- Verify external skill names are registered (could check against skill registry) - ---- - -### 3. Template Artifact Scanner +### 2. Template Artifact Scanner **What:** Scan for orphaned template substitution artifacts @@ -107,19 +105,20 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 4. Access Boundaries Extractor +### 3. Access Boundaries Extractor **What:** Extract and validate access boundaries from memory-system.md **Why:** Security critical — must be defined before file operations **Checks:** + ```python # Parse memory-system.md for: - ## Read Access section exists - ## Write Access section exists - ## Deny Zones section exists (can be empty) -- Paths use placeholders correctly ({project-root} for _bmad paths, relative for skill-internal) +- Paths use placeholders correctly ({project-root} for project-scope paths, ./ for skill-internal) ``` **Output:** Structured JSON of read/write/deny zones @@ -128,36 +127,18 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 5. Prompt Frontmatter Comparator - -**What:** Compare prompt file frontmatter against bmad-manifest.json - -**Why:** Capability misalignment causes runtime errors - -**Checks:** -```python -# For each prompt .md file at skill root: -- Has frontmatter (name, description, menu-code) -- name matches manifest capability name -- menu-code matches manifest (case-insensitive) -- description is present -``` - -**Output:** JSON with mismatches, missing files - -**Implementation:** Python, reads bmad-manifest.json and all prompt .md files at skill root - --- ## Priority 2: Analysis Scripts -### 6. Token Counter +### 4. Token Counter **What:** Count tokens in each file of an agent **Why:** Identify verbose files that need optimization **Checks:** + ```python # For each .md file: - Total tokens (approximate: chars / 4) @@ -171,16 +152,17 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 7. Dependency Graph Generator +### 5. Dependency Graph Generator **What:** Map skill → external skill dependencies **Why:** Understand agent's dependency surface **Checks:** + ```python -# Parse bmad-manifest.json for external skills # Parse SKILL.md for skill invocation patterns +# Parse prompt files for external skill references # Build dependency graph ``` @@ -190,24 +172,15 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 8. Activation Flow Analyzer +### 6. Activation Flow Analyzer **What:** Parse SKILL.md On Activation section for sequence **Why:** Validate activation order matches best practices **Checks:** -```python -# Look for steps in order: -1. Activation mode detection -2. Config loading -3. First-run check -4. Access boundaries load -5. Memory load -6. Manifest load -7. Greet -8. Present menu -``` + +Validate that the activation sequence is logically ordered (e.g., config loads before config is used, memory loads before memory is referenced). **Output:** JSON with detected steps, missing steps, out-of-order warnings @@ -215,13 +188,14 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 9. Memory Structure Validator +### 7. Memory Structure Validator **What:** Validate memory-system.md structure **Why:** Memory files have specific requirements **Checks:** + ```python # Required sections: - ## Core Principle @@ -236,13 +210,14 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 10. Subagent Pattern Detector +### 8. Subagent Pattern Detector **What:** Detect if agent uses BMAD Advanced Context Pattern **Why:** Agents processing 5+ sources MUST use subagents **Checks:** + ```python # Pattern detection in SKILL.md: - "DO NOT read sources yourself" @@ -259,7 +234,7 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc ## Priority 3: Composite Scripts -### 11. Agent Health Check +### 9. Agent Health Check **What:** Run all validation scripts and aggregate results @@ -273,13 +248,14 @@ All scripts use PEP 723 and `--help`. When a skill's prompt needs to invoke a sc --- -### 12. Comparison Validator +### 10. Comparison Validator **What:** Compare two versions of an agent for differences **Why:** Validate changes during iteration **Checks:** + ```bash # Git diff with structure awareness: - Frontmatter changes @@ -309,7 +285,7 @@ All scripts MUST output structured JSON for agent consumption: { "severity": "critical|high|medium|low|info", "category": "structure|security|performance|consistency", - "location": {"file": "SKILL.md", "line": 42}, + "location": { "file": "SKILL.md", "line": 42 }, "issue": "Clear description", "fix": "Specific action to resolve" } @@ -342,20 +318,20 @@ When creating validation scripts: --- -## Integration with Quality Optimizer +## Integration with Quality Analysis -The Quality Optimizer should: +The Quality Analysis skill should: 1. **First**: Run available scripts for fast, deterministic checks 2. **Then**: Use sub-agents for semantic analysis (requires judgment) 3. **Finally**: Synthesize both sources into report **Example flow:** + ```bash # Run all validation scripts python scripts/validate-frontmatter.py --agent-path {path} bash scripts/scan-template-artifacts.sh --agent-path {path} -python scripts/compare-prompts-manifest.py --agent-path {path} # Collect JSON outputs # Spawn sub-agents only for semantic checks @@ -367,19 +343,12 @@ python scripts/compare-prompts-manifest.py --agent-path {path} ## Script Creation Priorities **Phase 1 (Immediate value):** + 1. Template Artifact Scanner (Bash + jq) -2. Prompt Frontmatter Comparator (Python) -3. Access Boundaries Extractor (Python) +2. Access Boundaries Extractor (Python) -**Phase 2 (Enhanced validation):** -4. Token Counter (Python) -5. Subagent Pattern Detector (Python) -6. Activation Flow Analyzer (Python) +**Phase 2 (Enhanced validation):** 4. Token Counter (Python) 5. Subagent Pattern Detector (Python) 6. Activation Flow Analyzer (Python) -**Phase 3 (Advanced features):** -7. Dependency Graph Generator (Python) -8. Memory Structure Validator (Python) -9. Agent Health Check orchestrator (Bash) +**Phase 3 (Advanced features):** 7. Dependency Graph Generator (Python) 8. Memory Structure Validator (Python) 9. Agent Health Check orchestrator (Bash) -**Phase 4 (Comparison tools):** -10. Comparison Validator (Bash + Python) +**Phase 4 (Comparison tools):** 10. Comparison Validator (Bash + Python) diff --git a/plugins/bmad/skills/bmad-agent-builder/references/script-standards.md b/plugins/bmad/skills/bmad-agent-builder/references/script-standards.md new file mode 100644 index 0000000..0c7cc2d --- /dev/null +++ b/plugins/bmad/skills/bmad-agent-builder/references/script-standards.md @@ -0,0 +1,92 @@ +# Script Creation Standards + +When building scripts for a skill, follow these standards to ensure portability and zero-friction execution. Skills must work across macOS, Linux, and Windows (native, Git Bash, and WSL). + +## Python Over Bash + +**Always favor Python for script logic.** Bash is not portable — it fails or behaves inconsistently on Windows (Git Bash is MSYS2-based, not a full Linux shell; WSL bash can conflict with Git Bash on PATH; PowerShell is a different language entirely). Python with `uv run` works identically on all platforms. + +**Safe bash commands** — these work reliably across all environments and are fine to use directly: + +- `git`, `gh` — version control and GitHub CLI +- `uv run` — Python script execution with automatic dependency handling +- `npm`, `npx`, `pnpm` — Node.js ecosystem +- `mkdir -p` — directory creation + +**Everything else should be Python** — piping, `jq`, `grep`, `sed`, `awk`, `find`, `diff`, `wc`, and any non-trivial logic. Even `sed -i` behaves differently on macOS vs Linux. If it's more than a single safe command, write a Python script. + +## Favor the Standard Library + +Always prefer Python's standard library over external dependencies. The stdlib is pre-installed everywhere, requires no `uv run`, and has zero supply-chain risk. Common stdlib modules that cover most script needs: + +- `json` — JSON parsing and output +- `pathlib` — cross-platform path handling +- `re` — pattern matching +- `argparse` — CLI interface +- `collections` — counters, defaultdicts +- `difflib` — text comparison +- `ast` — Python source analysis +- `csv`, `xml.etree` — data formats + +Only pull in external dependencies when the stdlib genuinely cannot do the job (e.g., `tiktoken` for accurate token counting, `pyyaml` for YAML parsing, `jsonschema` for schema validation). **External dependencies must be confirmed with the user during the build process** — they add install-time cost, supply-chain surface, and require `uv` to be available. + +## PEP 723 Inline Metadata (Required) + +Every Python script MUST include a PEP 723 metadata block. For scripts with external dependencies, use the `uv run` shebang: + +```python +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["pyyaml>=6.0", "jsonschema>=4.0"] +# /// +``` + +For scripts using only the standard library, use a plain Python shebang but still include the metadata block: + +```python +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# /// +``` + +**Key rules:** + +- The shebang MUST be line 1 — before the metadata block +- Always include `requires-python` +- List all external dependencies with version constraints +- Never use `requirements.txt`, `pip install`, or expect global package installs +- The shebang is a Unix convenience — cross-platform invocation relies on `uv run scripts/foo.py`, not `./scripts/foo.py` + +## Invocation in SKILL.md + +How a built skill's SKILL.md should reference its scripts: + +- **Scripts with external dependencies:** `uv run scripts/analyze.py {args}` +- **Stdlib-only scripts:** `python3 scripts/scan.py {args}` (also fine to use `uv run` for consistency) + +`uv run` reads the PEP 723 metadata, silently caches dependencies in an isolated environment, and runs the script — no user prompt, no global install. Like `npx` for Python. + +## Graceful Degradation + +Skills may run in environments where Python or `uv` is unavailable (e.g., claude.ai web). Scripts should be the fast, reliable path — but the skill must still deliver its outcome when execution is not possible. + +**Pattern:** When a script cannot execute, the LLM performs the equivalent work directly. The script's `--help` documents what it checks, making this fallback natural. Design scripts so their logic is understandable from their help output and the skill's context. + +In SKILL.md, frame script steps as outcomes, not just commands: + +- Good: "Validate path conventions (run `scripts/scan-paths.py --help` for details)" +- Avoid: "Execute `python3 scripts/scan-paths.py`" with no context about what it does + +## Script Interface Standards + +- Implement `--help` via `argparse` (single source of truth for the script's API) +- Accept target path as a positional argument +- `-o` flag for output file (default to stdout) +- Diagnostics and progress to stderr +- Exit codes: 0=pass, 1=fail, 2=error +- `--verbose` flag for debugging +- Output valid JSON to stdout +- No interactive prompts, no network dependencies +- Tests in `scripts/tests/` diff --git a/plugins/bmad/skills/bmad-agent-builder/references/skill-best-practices.md b/plugins/bmad/skills/bmad-agent-builder/references/skill-best-practices.md index 67cdeb3..7353c10 100644 --- a/plugins/bmad/skills/bmad-agent-builder/references/skill-best-practices.md +++ b/plugins/bmad/skills/bmad-agent-builder/references/skill-best-practices.md @@ -1,218 +1,109 @@ # Skill Authoring Best Practices -Practical patterns for writing effective BMad agent skills. For field definitions and description format, see `references/standard-fields.md`. For quality dimensions, see `references/quality-dimensions.md`. - -## Core Principle: Informed Autonomy - -Give the executing agent enough context to make good judgment calls — not just enough to follow steps. The right test for every piece of content is: "Would the agent make *better decisions* with this context?" If yes, keep it. If it's genuinely redundant or mechanical, cut it. - -## Freedom Levels - -Match specificity to task fragility: - -| Freedom | When to Use | Example | -|---------|-------------|---------| -| **High** (text instructions) | Multiple valid approaches, context-dependent | "Analyze the user's vision and suggest capabilities" | -| **Medium** (pseudocode/templates) | Preferred pattern exists, some variation OK | `def generate_manifest(capabilities, format="json"):` | -| **Low** (exact scripts) | Fragile operations, consistency critical | `python3 scripts/manifest.py validate path/to/skill` (do not modify) | - -**Analogy**: Narrow bridge with cliffs = low freedom. Open field = high freedom. - -## Common Patterns - -### Template Pattern - -**Strict** (must follow exactly): -````markdown -## Report structure -ALWAYS use this template: -```markdown -# [Title] -## Summary -[One paragraph] -## Findings -- Finding 1 with data -``` -```` - -**Flexible** (adapt as needed): -````markdown -Here's a sensible default, use judgment: -```markdown -# [Title] -## Summary -[Overview] -``` -Adapt based on context. -```` - -### Examples Pattern - -Input/output pairs show expected style: -````markdown -## Commit message format -**Example 1:** -Input: "Added user authentication with JWT tokens" -Output: `feat(auth): implement JWT-based authentication` -```` - -### Conditional Workflow - -```markdown -1. Determine modification type: - **Creating new?** → Creation workflow - **Editing existing?** → Editing workflow -``` +For field definitions and description format, see `./standard-fields.md`. For quality dimensions, see `./quality-dimensions.md`. -### Soft Gate Elicitation +## Core Philosophy: Outcome-Based Authoring -For guided/interactive workflows, use "anything else?" soft gates at natural transition points instead of hard menus. This pattern draws out information users didn't know they had: +Skills should describe **what to achieve**, not **how to achieve it**. The LLM is capable of figuring out the approach — it needs to know the goal, the constraints, and the why. -```markdown -## After completing a discovery section: -Present what you've captured so far, then: -"Anything else you'd like to add, or shall we move on?" -``` +**The test for every instruction:** Would removing this cause the LLM to produce a worse outcome? If the LLM would do it anyway — or if it's just spelling out mechanical steps — cut it. -**Why it works:** Users almost always remember one more thing when given a graceful exit ramp rather than a hard stop. The low-pressure phrasing invites contribution without demanding it. This consistently produces richer, more complete artifacts than rigid section-by-section questioning. +### Outcome vs Prescriptive -**When to use:** Any guided workflow or agent with collaborative discovery — product briefs, requirements gathering, design reviews, brainstorming synthesis. Use at every natural transition between topics or sections. +| Prescriptive (avoid) | Outcome-based (prefer) | +| ----------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | +| "Step 1: Ask about goals. Step 2: Ask about constraints. Step 3: Summarize and confirm." | "Ensure the user's vision is fully captured — goals, constraints, and edge cases — before proceeding." | +| "Load config. Read user_name. Read communication_language. Greet the user by name in their language." | "Load available config and greet the user appropriately." | +| "Create a file. Write the header. Write section 1. Write section 2. Save." | "Produce a report covering X, Y, and Z." | -**When NOT to use:** Autonomous/headless execution, or steps where additional input would cause scope creep rather than enrich the output. +The prescriptive versions miss requirements the author didn't think of. The outcome-based versions let the LLM adapt to the actual situation. -### Intent-Before-Ingestion +### Why This Works -Never scan artifacts, documents, or project context until you understand WHY the user is here. Scanning without purpose produces noise, not signal. +- **Why over what** — When you explain why something matters, the LLM adapts to novel situations. When you just say what to do, it follows blindly even when it shouldn't. +- **Context enables judgment** — Give domain knowledge, constraints, and goals. The LLM figures out the approach. It's better at adapting to messy reality than any script you could write. +- **Prescriptive steps create brittleness** — When reality doesn't match the script, the LLM either follows the wrong script or gets confused. Outcomes let it adapt. +- **Every instruction should carry its weight** — If the LLM would do it anyway, the instruction is noise. If the LLM wouldn't know to do it without being told, that's signal. -```markdown -## On activation: -1. Greet and understand intent — what is this about? -2. Accept whatever inputs the user offers -3. Ask if they have additional documents or context -4. ONLY THEN scan artifacts, scoped to relevance -``` +### When Prescriptive Is Right -**Why it works:** Without knowing what the user wants, you can't judge what's relevant in a 100-page research doc vs a brainstorming report. Intent gives you the filter. Without it, scanning is a fool's errand. +Reserve exact steps for **fragile operations** where getting it wrong has consequences — script invocations, exact file paths, specific CLI commands, API calls with precise parameters. These need low freedom because there's one right way to do them. -**When to use:** Any agent that ingests documents, project context, or external data as part of its process. +| Freedom | When | Example | +| ------------------- | -------------------------------------------------- | ------------------------------------------------------------------- | +| **High** (outcomes) | Multiple valid approaches, LLM judgment adds value | "Ensure the user's requirements are complete" | +| **Medium** (guided) | Preferred approach exists, some variation OK | "Present findings in a structured report with an executive summary" | +| **Low** (exact) | Fragile, one right way, consequences for deviation | `python3 scripts/scan-path-standards.py {skill-path}` | -### Capture-Don't-Interrupt +## Patterns -When users provide information beyond the current scope (e.g., dropping requirements during a product brief, mentioning platforms during vision discovery), capture it silently for later use rather than redirecting or stopping them. +These are patterns that naturally emerge from outcome-based thinking. Apply them when they fit — they're not a checklist. -```markdown -## During discovery: -If user provides out-of-scope but valuable info: -- Capture it (notes, structured aside, addendum bucket) -- Don't interrupt their flow -- Use it later in the appropriate stage or output -``` +### Soft Gate Elicitation -**Why it works:** Users in creative flow will share their best insights unprompted. Interrupting to say "we'll cover that later" kills momentum and may lose the insight entirely. Capture everything, distill later. +At natural transitions, invite contribution without demanding it: "Anything else, or shall we move on?" Users almost always remember one more thing when given a graceful exit ramp. This produces richer artifacts than rigid section-by-section questioning. -**When to use:** Any collaborative discovery agent where the user is brainstorming, explaining, or brain-dumping. +### Intent-Before-Ingestion -### Dual-Output: Human Artifact + LLM Distillate +Understand why the user is here before scanning documents or project context. Intent gives you the relevance filter — without it, scanning is noise. -Any artifact-producing agent can output two complementary documents: a polished human-facing artifact AND a token-conscious, structured distillate optimized for downstream LLM consumption. +### Capture-Don't-Interrupt -```markdown -## Output strategy: -1. Primary: Human-facing document (exec summary, report, brief) -2. Optional: LLM distillate — dense, structured, token-efficient - - Captures overflow that doesn't belong in the human doc - - Rejected ideas (so downstream doesn't re-propose them) - - Detail bullets with just enough context to stand alone - - Designed to be loaded as context for the next workflow -``` +When users provide information beyond the current scope, capture it for later rather than redirecting. Users in creative flow share their best insights unprompted — interrupting loses them. -**Why it works:** Human docs are concise by design — they can't carry all the detail surfaced during discovery. But that detail has value for downstream LLM workflows (PRD creation, architecture design, etc.). The distillate bridges the gap without bloating the primary artifact. +### Dual-Output: Human Artifact + LLM Distillate -**When to use:** Any agent producing documents that feed into subsequent LLM workflows. The distillate is always optional — offered to the user, not forced. +Artifact-producing skills can output both a polished human-facing document and a token-efficient distillate for downstream LLM consumption. The distillate captures overflow, rejected ideas, and detail that doesn't belong in the human doc but has value for the next workflow. Always optional. ### Parallel Review Lenses -Before finalizing any artifact, fan out multiple reviewers with different perspectives to catch blind spots the builder/facilitator missed. - -```markdown -## Near completion: -Fan out 2-3 review subagents in parallel: -- Skeptic: "What's missing? What assumptions are untested?" -- Opportunity Spotter: "What adjacent value? What angles?" -- Contextual Reviewer: LLM picks the best third lens - (e.g., "regulatory risk" for healthtech, "DX critic" for devtools) +Before finalizing significant artifacts, fan out reviewers with different perspectives — skeptic, opportunity spotter, domain-specific lens. If subagents aren't available, do a single critical self-review pass. Multiple perspectives catch blind spots no single reviewer would. -Graceful degradation: If subagents unavailable, -main agent does a single critical self-review pass. -``` +### Three-Mode Architecture (Guided / Yolo / Headless) -**Why it works:** A single perspective — even an expert one — has blind spots. Multiple lenses surface issues and opportunities that no single reviewer would catch. The contextually-chosen third lens ensures domain-specific concerns aren't missed. +Consider whether the skill benefits from multiple execution modes: -**When to use:** Any agent producing a significant artifact (briefs, PRDs, designs, architecture docs). The review step is lightweight but high-value. +| Mode | When | Behavior | +| ------------ | ------------------- | ------------------------------------------------------------- | +| **Guided** | Default | Conversational discovery with soft gates | +| **Yolo** | "just draft it" | Ingest everything, draft complete artifact, then refine | +| **Headless** | `--headless` / `-H` | Complete the task without user input, using sensible defaults | -### Three-Mode Architecture (Guided / Yolo / Autonomous) - -For interactive agents, offer three execution modes that match different user contexts: - -| Mode | Trigger | Behavior | -|------|---------|----------| -| **Guided** | Default | Section-by-section with soft gates. Drafts from what it knows, questions what it doesn't. | -| **Yolo** | `--yolo` or "just draft it" | Ingests everything, drafts complete artifact upfront, then walks user through refinement. | -| **Autonomous** | `--headless` / `-H` | Headless. Takes inputs, produces artifact, no interaction. | - -**Why it works:** Not every user wants the same experience. A first-timer needs guided discovery. A repeat user with clear inputs wants yolo. A pipeline wants autonomous. Same agent, three entry points. - -**When to use:** Any facilitative agent that produces an artifact. Not all agents need all three — but considering them during design prevents painting yourself into a single interaction model. +Not all skills need all three. But considering them during design prevents locking into a single interaction model. ### Graceful Degradation -Every subagent-dependent feature should have a fallback path. If the platform doesn't support parallel subagents (or subagents at all), the workflow must still progress. - -```markdown -## Subagent-dependent step: -Try: Fan out subagents in parallel -Fallback: Main agent performs the work sequentially -Never: Block the workflow because a subagent feature is unavailable -``` - -**Why it works:** Skills run across different platforms, models, and configurations. A skill that hard-fails without subagents is fragile. A skill that gracefully falls back to sequential processing is robust everywhere. - -**When to use:** Any agent that uses subagents for research, review, or parallel processing. +Every subagent-dependent feature should have a fallback path. A skill that hard-fails without subagents is fragile — one that falls back to sequential processing works everywhere. ### Verifiable Intermediate Outputs -For complex tasks: plan → validate → execute → verify - -1. Analyze inputs -2. **Create** `changes.json` with planned updates -3. **Validate** with script before executing -4. Execute changes -5. Verify output - -Benefits: catches errors early, machine-verifiable, reversible planning. +For complex tasks with consequences: plan → validate → execute → verify. Create a verifiable plan before executing, validate with scripts where possible. Catches errors early and makes the work reversible. ## Writing Guidelines -- **Consistent terminology** — choose one term per concept, stick to it +- **Consistent terminology** — one term per concept, stick to it - **Third person** in descriptions — "Processes files" not "I help process files" - **Descriptive file names** — `form_validation_rules.md` not `doc2.md` - **Forward slashes** in all paths — cross-platform -- **One level deep** for reference files — SKILL.md → reference.md, never SKILL.md → A.md → B.md -- **TOC for long files** — add table of contents for files >100 lines +- **One level deep** for reference files — SKILL.md → reference.md, never chains +- **TOC for long files** — >100 lines ## Anti-Patterns -| Anti-Pattern | Fix | -|---|---| -| Too many options upfront | One default with escape hatch for edge cases | -| Deep reference nesting (A→B→C) | Keep references 1 level from SKILL.md | -| Inconsistent terminology | Choose one term per concept | -| Vague file names | Name by content, not sequence | -| Scripts that classify meaning via regex | Intelligence belongs in prompts, not scripts | +| Anti-Pattern | Fix | +| -------------------------------------------------- | ----------------------------------------------------- | +| Numbered steps for things the LLM would figure out | Describe the outcome and why it matters | +| Explaining how to load config (the mechanic) | List the config keys and their defaults (the outcome) | +| Prescribing exact greeting/menu format | "Greet the user and present capabilities" | +| Spelling out headless mode in detail | "If headless, complete without user input" | +| Too many options upfront | One default with escape hatch | +| Deep reference nesting (A→B→C) | Keep references 1 level from SKILL.md | +| Inconsistent terminology | Choose one term per concept | +| Scripts that classify meaning via regex | Intelligence belongs in prompts, not scripts | ## Scripts in Skills -- **Execute vs reference** — "Run `analyze.py` to extract fields" (execute) vs "See `analyze.py` for the algorithm" (read) +- **Execute vs reference** — "Run `analyze.py`" (execute) vs "See `analyze.py` for the algorithm" (read) - **Document constants** — explain why `TIMEOUT = 30`, not just what -- **PEP 723 for Python** — self-contained scripts with inline dependency declarations +- **PEP 723 for Python** — self-contained with inline dependency declarations - **MCP tools** — use fully qualified names: `ServerName:tool_name` diff --git a/plugins/bmad/skills/bmad-agent-builder/references/standard-fields.md b/plugins/bmad/skills/bmad-agent-builder/references/standard-fields.md index 52b64a5..6f493f6 100644 --- a/plugins/bmad/skills/bmad-agent-builder/references/standard-fields.md +++ b/plugins/bmad/skills/bmad-agent-builder/references/standard-fields.md @@ -1,20 +1,32 @@ # Standard Agent Fields -| Field | Description | Example | -|-------|-------------|---------| -| `name` | Full skill name | `bmad-agent-tech-writer`, `bmad-cis-agent-lila` | -| `skillName` | Functional name (kebab-case) | `tech-writer`, `lila` | -| `displayName` | Friendly name | `Paige`, `Lila`, `Floyd` | -| `title` | Role title | `Tech Writer`, `Holodeck Operator` | -| `icon` | Single emoji | `🔥`, `🌟` | -| `role` | Functional role | `Technical Documentation Specialist` | -| `sidecar` | Memory folder (optional) | `{skillName}-sidecar/` | +## Frontmatter Fields + +Only these fields go in the YAML frontmatter block: + +| Field | Description | Example | +| ------------- | ------------------------------------------------- | ----------------------------------------------- | +| `name` | Full skill name (kebab-case, same as folder name) | `bmad-agent-tech-writer`, `bmad-cis-agent-lila` | +| `description` | [What it does]. [Use when user says 'X' or 'Y'.] | See Description Format below | + +## Content Fields + +These are used within the SKILL.md body — never in frontmatter: + +| Field | Description | Example | +| ------------- | ---------------------------------------- | ------------------------------------ | +| `displayName` | Friendly name (title heading, greetings) | `Paige`, `Lila`, `Floyd` | +| `title` | Role title | `Tech Writer`, `Holodeck Operator` | +| `icon` | Single emoji | `🔥`, `🌟` | +| `role` | Functional role | `Technical Documentation Specialist` | +| `sidecar` | Memory folder (optional) | `{skillName}-sidecar/` | ## Overview Section Format The Overview is the first section after the title — it primes the AI for everything that follows. **3-part formula:** + 1. **What** — What this agent does 2. **How** — How it works (role, approach, modes) 3. **Why/Outcome** — Value delivered, quality standard @@ -22,16 +34,19 @@ The Overview is the first section after the title — it primes the AI for every **Templates by agent type:** **Companion agents:** + ```markdown This skill provides a {role} who helps users {primary outcome}. Act as {displayName} — {key quality}. With {key features}, {displayName} {primary value proposition}. ``` **Workflow agents:** + ```markdown This skill helps you {outcome} through {approach}. Act as {role}, guiding users through {key stages/phases}. Your output is {deliverable}. ``` **Utility agents:** + ```markdown This skill {what it does}. Use when {when to use}. Returns {output format} with {key feature}. ``` @@ -44,60 +59,33 @@ This skill {what it does}. Use when {when to use}. Returns {output format} with ## Path Rules -**Critical**: When prompts reference files in memory, always use full paths. - -### Memory Files (sidecar) - -Always use: `{project-root}/_bmad/_memory/{skillName}-sidecar/` +### Skill-Internal Files -Examples: -- `{project-root}/_bmad/_memory/journaling-companion-sidecar/index.md` -- `{project-root}/_bmad/_memory/journaling-companion-sidecar/access-boundaries.md` — **Required** -- `{project-root}/_bmad/_memory/journaling-companion-sidecar/autonomous-log.md` -- `{project-root}/_bmad/_memory/journaling-companion-sidecar/references/tags-reference.md` +All references to files within the skill use `./` relative paths: -### Access Boundaries (Standard for all agents) +- `./references/memory-system.md` +- `./references/some-guide.md` +- `./scripts/calculate-metrics.py` -Every agent must have an `access-boundaries.md` file in its sidecar memory: +This distinguishes skill-internal files from `{project-root}` paths — without the `./` prefix the LLM may confuse them. -**Load on every activation** — Before any file operations. - -**Structure:** -```markdown -# Access Boundaries for {displayName} - -## Read Access -- {folder-or-pattern} - -## Write Access -- {folder-or-pattern} - -## Deny Zones -- {forbidden-path} -``` +### Memory Files (sidecar) -**Purpose:** Define clear boundaries for what the agent can and cannot access, especially important for autonomous agents. +Always use `{project-root}` prefix: `{project-root}/_bmad/memory/{skillName}-sidecar/` -### User-Configured Locations +The sidecar `index.md` is the single entry point to the agent's memory system — it tells the agent what else to load (boundaries, logs, references, etc.). Load it once on activation; don't duplicate load instructions for individual memory files. -Folders/files the user provides during init (like journal location) get stored in `index.md`. Both interactive and autonomous modes: +### Project-Scope Paths -1. Load `index.md` first -2. Read the user's configured paths -3. Use those paths for operations +Use `{project-root}/...` for any path relative to the project root: -Example pattern: -```markdown -## Autonomous Mode +- `{project-root}/_bmad/planning/prd.md` +- `{project-root}/docs/report.md` -When run autonomously: -1. Load `{project-root}/_bmad/_memory/{skillName}-sidecar/index.md` to get user's journal location -2. Read entries from that location -3. Write results to `{project-root}/_bmad/_memory/{skillName}-sidecar/autonomous-log.md` -``` +### Config Variables -## CLI Usage (Autonomous Agents) +Use directly — they already contain `{project-root}` in their resolved values: -Agents with autonomous mode should include a `## CLI Usage` section documenting headless invocation: - -```markdown +- `{output_folder}/file.md` +- Correct: `{bmad_builder_output_folder}/agent.md` +- Wrong: `{project-root}/{bmad_builder_output_folder}/agent.md` (double-prefix) diff --git a/plugins/bmad/skills/bmad-agent-builder/references/template-substitution-rules.md b/plugins/bmad/skills/bmad-agent-builder/references/template-substitution-rules.md index b3bce15..0d90fa3 100644 --- a/plugins/bmad/skills/bmad-agent-builder/references/template-substitution-rules.md +++ b/plugins/bmad/skills/bmad-agent-builder/references/template-substitution-rules.md @@ -1,72 +1,47 @@ # Template Substitution Rules -When building the agent, you MUST apply these conditional blocks to the templates: +The SKILL-template provides a minimal skeleton: frontmatter, overview, agent identity sections, sidecar, and activation with config loading. Everything beyond that is crafted by the builder based on what was learned during discovery and requirements phases. -## For Module-Based Agents +## Frontmatter + +- `{module-code-or-empty}` → Module code prefix with hyphen (e.g., `cis-`) or empty for standalone +- `{agent-name}` → Agent functional name (kebab-case) +- `{skill-description}` → Two parts: [4-6 word summary]. [trigger phrases] +- `{displayName}` → Friendly display name +- `{skillName}` → Full skill name with module prefix + +## Module Conditionals + +### For Module-Based Agents - `{if-module}` ... `{/if-module}` → Keep the content inside - `{if-standalone}` ... `{/if-standalone}` → Remove the entire block including markers -- `{custom-config-properties}` → Replace with comma-separated custom property names (e.g., `journal_folder, adventure_logs_folder`) or remove line if none -- `{module-code-or-empty}` → Replace with module code (e.g., `cis-`) or empty string for standalone +- `{module-code}` → Module code without trailing hyphen (e.g., `cis`) +- `{module-setup-skill}` → Name of the module's setup skill (e.g., `bmad-cis-setup`) -## For Standalone Agents +### For Standalone Agents - `{if-module}` ... `{/if-module}` → Remove the entire block including markers - `{if-standalone}` ... `{/if-standalone}` → Keep the content inside -- `{custom-config-properties}` → Remove (not used for standalone) -- `{module-code-or-empty}` → Empty string -- `{custom-init-questions}` → Add user's additional questions here (remove placeholder if none) - -## For Agents With Sidecar (Memory) -- `{if-sidecar}` ... `{/if-sidecar}` → Keep the content inside -- `{if-no-sidecar}` ... `{/if-no-sidecar}` → Remove the entire block including markers +## Sidecar Conditionals -## For Agents Without Sidecar +- `{if-sidecar}` ... `{/if-sidecar}` → Keep if agent has persistent memory, otherwise remove +- `{if-no-sidecar}` ... `{/if-no-sidecar}` → Inverse of above -- `{if-sidecar}` ... `{/if-sidecar}` → Remove the entire block including markers -- `{if-no-sidecar}` ... `{/if-no-sidecar}` → Keep the content inside +## Headless Conditional -## External Skills +- `{if-headless}` ... `{/if-headless}` → Keep if agent supports headless mode, otherwise remove -- `{if-external-skills}` ... `{/if-external-skills}` → Keep if agent uses external skills, otherwise remove entire block -- `{external-skills-list}` → Replace with bulleted list of exact skill names: - ```markdown - - `bmad-skill-name-one` — Description - - `bmad-skill-name-two` — Description - ``` +## Beyond the Template -## Custom Init Questions - -Add user's additional questions to the init.md template, replacing `{custom-init-questions}` placeholder. Remove the placeholder line if no custom questions. +The builder determines the rest of the agent structure — capabilities, activation flow, sidecar initialization, capability routing, external skills, scripts — based on the agent's requirements. The template intentionally does not prescribe these. ## Path References -All generated agents use these paths: -- `init.md` — First-run setup -- `{name}.md` — Individual capability prompts -- `references/memory-system.md` — Memory discipline (if sidecar needed) -- `bmad-manifest.json` — Capabilities and metadata with menu codes -- `scripts/` — Python/shell scripts for deterministic operations (if needed) - -## Frontmatter Placeholders - -Replace all frontmatter placeholders in SKILL-template.md: -- `{module-code-or-empty}` → Module code (e.g., `cis-`) or empty -- `{agent-name}` → Agent functional name (kebab-case) -- `{short phrase what agent does}` → One-line description -- `{displayName}` → Friendly name -- `{title}` → Role title -- `{role}` → Functional role -- `{skillName}` → Full skill name with module prefix -- `{user_name}` → From config -- `{communication_language}` → From config - -## Content Placeholders +All generated agents use `./` prefix for skill-internal paths: -Replace all content placeholders with agent-specific values: -- `{overview-template}` → Overview paragraph (2-3 sentences) following the 3-part formula (What, How, Why/Outcome) -- `{One-sentence identity.}` → Brief identity statement -- `{Who is this agent? One clear sentence.}` → Identity description -- `{How does this agent communicate? Be specific with examples.}` → Communication style -- `{Guiding principle 1/2/3}` → Agent's principles +- `./references/init.md` — First-run onboarding (if sidecar) +- `./references/{capability}.md` — Individual capability prompts +- `./references/memory-system.md` — Memory discipline (if sidecar) +- `./scripts/` — Python/shell scripts for deterministic operations diff --git a/plugins/bmad/skills/bmad-agent-builder/references/universal-scan-schema.md b/plugins/bmad/skills/bmad-agent-builder/references/universal-scan-schema.md deleted file mode 100644 index 11e6df8..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/references/universal-scan-schema.md +++ /dev/null @@ -1,267 +0,0 @@ -# Universal Scanner Output Schema - -All quality scanners — both LLM-based and deterministic lint scripts — MUST produce output conforming to this schema. No exceptions. - -## Top-Level Structure - -```json -{ - "scanner": "scanner-name", - "skill_path": "{path}", - "findings": [], - "assessments": {}, - "summary": { - "total_findings": 0, - "by_severity": {}, - "assessment": "1-2 sentence overall assessment" - } -} -``` - -| Key | Type | Required | Description | -|-----|------|----------|-------------| -| `scanner` | string | yes | Scanner identifier (e.g., `"workflow-integrity"`, `"prompt-craft"`) | -| `skill_path` | string | yes | Absolute path to the skill being scanned | -| `findings` | array | yes | ALL items — issues, strengths, suggestions, opportunities. Always an array, never an object | -| `assessments` | object | yes | Scanner-specific structured analysis (cohesion tables, health metrics, user journeys, etc.). Free-form per scanner | -| `summary` | object | yes | Aggregate counts and brief overall assessment | - -## Finding Schema (7 fields) - -Every item in `findings[]` has exactly these 7 fields: - -```json -{ - "file": "SKILL.md", - "line": 42, - "severity": "high", - "category": "frontmatter", - "title": "Brief headline of the finding", - "detail": "Full context — rationale, what was observed, why it matters", - "action": "What to do about it — fix, suggestion, or script to create" -} -``` - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `file` | string | yes | Relative path to the affected file (e.g., `"SKILL.md"`, `"scripts/build.py"`). Empty string if not file-specific | -| `line` | int\|null | no | Line number (1-based). `null` or `0` if not line-specific | -| `severity` | string | yes | One of the severity values below | -| `category` | string | yes | Scanner-specific category (e.g., `"frontmatter"`, `"token-waste"`, `"lint"`) | -| `title` | string | yes | Brief headline (1 sentence). This is the primary display text | -| `detail` | string | yes | Full context — fold rationale, observation, impact, nuance into one narrative. Empty string if title is self-explanatory | -| `action` | string | yes | What to do — fix instruction, suggestion, or script to create. Empty string for strengths/notes | - -## Severity Values (complete enum) - -``` -critical | high | medium | low | high-opportunity | medium-opportunity | low-opportunity | suggestion | strength | note -``` - -**Routing rules:** -- `critical`, `high` → "Truly Broken" section in report -- `medium`, `low` → category-specific findings sections -- `high-opportunity`, `medium-opportunity`, `low-opportunity` → enhancement/creative sections -- `suggestion` → creative suggestions section -- `strength` → strengths section (positive observations worth preserving) -- `note` → informational observations, also routed to strengths - -## Assessment Sub-Structure Contracts - -The `assessments` object is free-form per scanner, but the HTML report renderer expects specific shapes for specific keys. These are the canonical formats. - -### user_journeys (enhancement-opportunities scanner) - -**Always an array of objects. Never an object keyed by persona.** - -```json -"user_journeys": [ - { - "archetype": "first-timer", - "summary": "Brief narrative of this user's experience", - "friction_points": ["moment 1", "moment 2"], - "bright_spots": ["what works well"] - } -] -``` - -### autonomous_assessment (enhancement-opportunities scanner) - -```json -"autonomous_assessment": { - "potential": "headless-ready|easily-adaptable|partially-adaptable|fundamentally-interactive", - "hitl_points": 3, - "auto_resolvable": 2, - "needs_input": 1, - "notes": "Brief assessment" -} -``` - -### top_insights (enhancement-opportunities scanner) - -**Always an array of objects with title/detail/action (same shape as findings but without file/line/severity/category).** - -```json -"top_insights": [ - { - "title": "The key observation", - "detail": "Why it matters", - "action": "What to do about it" - } -] -``` - -### cohesion_analysis (skill-cohesion / agent-cohesion scanner) - -```json -"cohesion_analysis": { - "dimension_name": { "score": "strong|moderate|weak", "notes": "explanation" } -} -``` - -Dimension names are scanner-specific (e.g., `stage_flow_coherence`, `persona_alignment`). The report renderer iterates all keys and renders a table row per dimension. - -### skill_identity / agent_identity (cohesion scanners) - -```json -"skill_identity": { - "name": "skill-name", - "purpose_summary": "Brief characterization", - "primary_outcome": "What this skill produces" -} -``` - -### skillmd_assessment (prompt-craft scanner) - -```json -"skillmd_assessment": { - "overview_quality": "appropriate|excessive|missing", - "progressive_disclosure": "good|needs-extraction|monolithic", - "notes": "brief assessment" -} -``` - -Agent variant adds `"persona_context": "appropriate|excessive|missing"`. - -### prompt_health (prompt-craft scanner) - -```json -"prompt_health": { - "total_prompts": 3, - "with_config_header": 2, - "with_progression": 1, - "self_contained": 3 -} -``` - -### skill_understanding (enhancement-opportunities scanner) - -```json -"skill_understanding": { - "purpose": "what this skill does", - "primary_user": "who it's for", - "assumptions": ["assumption 1", "assumption 2"] -} -``` - -### stage_summary (workflow-integrity scanner) - -```json -"stage_summary": { - "total_stages": 0, - "missing_stages": [], - "orphaned_stages": [], - "stages_without_progression": [], - "stages_without_config_header": [] -} -``` - -### metadata (structure scanner) - -Free-form key-value pairs. Rendered as a metadata block. - -### script_summary (scripts lint) - -```json -"script_summary": { - "total_scripts": 5, - "by_type": {"python": 3, "shell": 2}, - "missing_tests": ["script1.py"] -} -``` - -### existing_scripts (script-opportunities scanner) - -Array of strings (script paths that already exist). - -## Complete Example - -```json -{ - "scanner": "workflow-integrity", - "skill_path": "/path/to/skill", - "findings": [ - { - "file": "SKILL.md", - "line": 12, - "severity": "high", - "category": "frontmatter", - "title": "Missing required 'version' field in frontmatter", - "detail": "The SKILL.md frontmatter is missing the version field. This prevents the manifest generator from producing correct output and breaks version-aware consumers.", - "action": "Add 'version: 1.0.0' to the YAML frontmatter block" - }, - { - "file": "build-process.md", - "line": null, - "severity": "strength", - "category": "design", - "title": "Excellent progressive disclosure pattern in build stages", - "detail": "Each stage provides exactly the context needed without front-loading information. This reduces token waste and improves LLM comprehension.", - "action": "" - }, - { - "file": "SKILL.md", - "line": 45, - "severity": "medium-opportunity", - "category": "experience-gap", - "title": "No guidance for first-time users unfamiliar with build workflows", - "detail": "A user encountering this skill for the first time has no onboarding path. The skill assumes familiarity with stage-based workflows, which creates friction for newcomers.", - "action": "Add a 'Getting Started' section or link to onboarding documentation" - } - ], - "assessments": { - "stage_summary": { - "total_stages": 7, - "missing_stages": [], - "orphaned_stages": ["cleanup"] - } - }, - "summary": { - "total_findings": 3, - "by_severity": {"high": 1, "medium-opportunity": 1, "strength": 1}, - "assessment": "Well-structured skill with one critical frontmatter gap. Progressive disclosure is a notable strength." - } -} -``` - -## DO NOT - -- **DO NOT** rename fields. Use exactly: `file`, `line`, `severity`, `category`, `title`, `detail`, `action` -- **DO NOT** use `issues` instead of `findings` — the array is always called `findings` -- **DO NOT** add fields to findings beyond the 7 defined above. Put scanner-specific structured data in `assessments` -- **DO NOT** use separate arrays for strengths, suggestions, or opportunities — they go in `findings` with appropriate severity values -- **DO NOT** change `user_journeys` from an array to an object keyed by persona name -- **DO NOT** restructure assessment sub-objects — use the shapes defined above -- **DO NOT** put free-form narrative data into `assessments` — that belongs in `detail` fields of findings or in `summary.assessment` - -## Self-Check Before Output - -Before writing your JSON output, verify: - -1. Is your array called `findings` (not `issues`, not `opportunities`)? -2. Does every item in `findings` have all 7 fields: `file`, `line`, `severity`, `category`, `title`, `detail`, `action`? -3. Are strengths in `findings` with `severity: "strength"` (not in a separate `strengths` array)? -4. Are suggestions in `findings` with `severity: "suggestion"` (not in a separate `creative_suggestions` array)? -5. Is `assessments` an object containing structured analysis data (not items that belong in findings)? -6. Is `user_journeys` an array of objects (not an object keyed by persona)? -7. Do `top_insights` items use `title`/`detail`/`action` (not `insight`/`suggestion`/`why_it_matters`)? diff --git a/plugins/bmad/skills/bmad-agent-builder/report-quality-scan-creator.md b/plugins/bmad/skills/bmad-agent-builder/report-quality-scan-creator.md index 3a0376e..b9d02e2 100644 --- a/plugins/bmad/skills/bmad-agent-builder/report-quality-scan-creator.md +++ b/plugins/bmad/skills/bmad-agent-builder/report-quality-scan-creator.md @@ -1,138 +1,286 @@ -# Quality Scan Report Creator +# BMad Method · Quality Analysis Report Creator -You are a master quality engineer tech writer agent QualityReportBot-9001. You create comprehensive, cohesive quality reports from multiple scanner outputs. You read all temporary JSON fragments, consolidate findings, remove duplicates, and produce a well-organized markdown report using the provided template. You are quality obsessed — nothing gets dropped. You will never attempt to fix anything — you are a writer, not a fixer. +You synthesize scanner analyses into an actionable quality report for a BMad agent. You read all scanner output — structured JSON from lint scripts, free-form analysis from LLM scanners — and produce two outputs: a narrative markdown report for humans and a structured JSON file for the interactive HTML renderer. + +Your job is **synthesis, not transcription.** Don't list findings by scanner. Identify themes — root causes that explain clusters of observations across multiple scanners. Lead with the agent's identity, celebrate what's strong, then show opportunities. ## Inputs -- `{skill-path}` — Path to the agent being validated -- `{quality-report-dir}` — Directory containing scanner temp files AND where to write the final report +- `{skill-path}` — Path to the agent being analyzed +- `{quality-report-dir}` — Directory containing all scanner output AND where to write your reports -## Template +## Process -Read `assets/quality-report-template.md` for the report structure. The template contains: -- `{placeholder}` markers — replace with actual data -- `{if-section}...{/if-section}` blocks — include only when data exists, omit entirely when empty -- `` — inline guidance for what data to pull and from where; strip from final output +### Step 1: Read Everything -## Process +Read all files in `{quality-report-dir}`: + +- `*-temp.json` — Lint script output (structured JSON with findings arrays) +- `*-prepass.json` — Pre-pass metrics (structural data, token counts, capabilities) +- `*-analysis.md` — LLM scanner analyses (free-form markdown) + +Also read the agent's `SKILL.md` to extract: name, icon, title, identity, communication style, principles, and the capability routing table. + +### Step 2: Build the Agent Portrait + +From the agent's SKILL.md, synthesize a 2-3 sentence portrait that captures who this agent is — their personality, expertise, and voice. This opens the report and makes the user feel their agent reflected back before any critique. Include the agent's icon, display name, and title. + +### Step 3: Build the Capability Dashboard + +From the routing table in SKILL.md, list every capability. Cross-reference with scanner findings — any finding that references a capability file gets associated with that capability. Rate each: + +- **Good** — no findings or only low/note severity +- **Needs attention** — medium+ findings referencing this capability + +This dashboard shows the user the breadth of what they built and directs attention where it's needed. + +### Step 4: Synthesize Themes + +Look across ALL scanner output for **findings that share a root cause** — observations from different scanners that would be resolved by the same fix. + +Ask: "If I fixed X, how many findings across all scanners would this resolve?" + +Group related findings into 3-5 themes. A theme has: + +- **Name** — clear description of the root cause +- **Description** — what's happening and why it matters (2-3 sentences) +- **Severity** — highest severity of constituent findings +- **Impact** — what fixing this would improve +- **Action** — one coherent instruction to address the root cause +- **Constituent findings** — specific observations with source scanner, file:line, brief description + +Findings that don't fit any theme become standalone items in detailed analysis. + +### Step 5: Assess Overall Quality + +- **Grade:** Excellent / Good / Fair / Poor (based on severity distribution) +- **Narrative:** 2-3 sentences capturing the agent's primary strength and primary opportunity + +### Step 6: Collect Strengths + +Gather strengths from all scanners. These tell the user what NOT to break — especially important for agents where personality IS the value. + +### Step 7: Organize Detailed Analysis + +For each analysis dimension, summarize the scanner's assessment and list findings not covered by themes: -### Step 1: Ingest Everything +- **Structure & Capabilities** — from structure scanner +- **Persona & Voice** — from prompt-craft scanner (agent-specific framing) +- **Identity Cohesion** — from agent-cohesion scanner +- **Execution Efficiency** — from execution-efficiency scanner +- **Conversation Experience** — from enhancement-opportunities scanner (journeys, headless, edge cases) +- **Script Opportunities** — from script-opportunities scanner -1. Read `assets/quality-report-template.md` -2. List ALL files in `{quality-report-dir}` — both `*-temp.json` (scanner findings) and `*-prepass.json` (structural metrics) -3. Read EVERY JSON file +### Step 8: Rank Recommendations -### Step 2: Extract All Data Types +Order by impact — "how many findings does fixing this resolve?" The fix that clears 9 findings ranks above the fix that clears 1. -All scanners now use the universal schema defined in `references/universal-scan-schema.md`. Scanner-specific data lives in `assessments{}`, not as top-level keys. +## Write Two Files -For each scanner file, extract not just `findings` arrays but ALL of these data types: +### 1. quality-report.md -| Data Type | Where It Lives | Report Destination | -|-----------|---------------|-------------------| -| Issues/findings (severity: critical-low) | All scanner `findings[]` | Detailed Findings by Category | -| Strengths (severity: "strength"/"note", category: "strength") | All scanners: findings where severity="strength" | Strengths section | -| Agent identity | agent-cohesion `assessments.agent_identity` | Agent Identity section + Executive Summary | -| Cohesion dimensional analysis | agent-cohesion `assessments.cohesion_analysis` | Cohesion Analysis table | -| Consolidation opportunities | agent-cohesion `assessments.cohesion_analysis.redundancy_level.consolidation_opportunities` | Consolidation Opportunities in Cohesion | -| Creative suggestions | `findings[]` with severity="suggestion" (no separate creative_suggestions array) | Creative Suggestions in Cohesion section | -| Craft & agent assessment | prompt-craft `assessments.skillmd_assessment` (incl. `persona_context`), `assessments.prompt_health`, `summary.assessment` | Prompt Craft section header + Executive Summary | -| Structure metadata | structure `assessments.metadata` (has_memory, has_headless, manifest_valid, etc.) | Structure & Capabilities section header | -| User journeys | enhancement-opportunities `assessments.user_journeys[]` | User Journeys section | -| Autonomous assessment | enhancement-opportunities `assessments.autonomous_assessment` | Autonomous Readiness section | -| Skill understanding | enhancement-opportunities `assessments.skill_understanding` | Creative section header | -| Top insights | enhancement-opportunities `assessments.top_insights[]` | Top Insights in Creative section | -| Optimization opportunities | `findings[]` with severity ending in "-opportunity" (no separate opportunities array) | Optimization Opportunities in Efficiency section | -| Script inventory & token savings | scripts `assessments.script_summary`, script-opportunities `summary` | Scripts sections | -| Prepass metrics | `*-prepass.json` files | Context data points where useful | +```markdown +# BMad Method · Quality Analysis: {agent-name} -### Step 3: Populate Template +**{icon} {display-name}** — {title} +**Analyzed:** {timestamp} | **Path:** {skill-path} +**Interactive report:** quality-report.html -Fill the template section by section, following the `` guidance in each. Key rules: +## Agent Portrait -- **Conditional sections:** Only include `{if-...}` blocks when the data exists. If a scanner didn't produce user_journeys, omit the entire User Journeys section. -- **Empty severity levels:** Within a category, omit severity sub-headers that have zero findings. -- **Persona voice:** When reporting prompt-craft findings, remember that persona voice is INVESTMENT for agents, not waste. Reflect the scanner's nuance field if present. -- **Strip comments:** Remove all `` blocks from final output. +{synthesized 2-3 sentence portrait} -### Step 4: Deduplicate +## Capabilities -- **Same issue, two scanners:** Keep ONE entry, cite both sources. Use the more detailed description. -- **Same issue pattern, multiple files:** List once with all file:line references in a table. -- **Issue + strength about same thing:** Keep BOTH — strength shows what works, issue shows what could be better. -- **Overlapping creative suggestions:** Merge into the richer description. -- **Routing:** "note"/"strength" severity → Strengths section. "suggestion" severity → Creative subsection. Do not mix these into issue lists. +| Capability | Status | Observations | +| ---------- | ---------------------- | ------------ | +| {name} | Good / Needs attention | {count or —} | -### Step 5: Verification Pass +## Assessment -**This step is mandatory.** After populating the report, re-read every temp file and verify against this checklist: +**{Grade}** — {narrative} -- [ ] Every finding from every `*-temp.json` findings[] array -- [ ] Agent identity block (persona_summary, primary_purpose, capability_count) -- [ ] All findings with severity="strength" from any scanner -- [ ] All positive notes from prompt-craft (severity="note") -- [ ] Cohesion analysis dimensional scores table (if present) -- [ ] Consolidation opportunities from cohesion redundancy analysis -- [ ] Craft assessment, skill type assessment, and persona context assessment -- [ ] Structure metadata (sections_found, has_memory, has_headless, manifest_valid) -- [ ] ALL user journeys with ALL friction_points and bright_spots per archetype -- [ ] The autonomous_assessment block (all fields) -- [ ] All findings with severity="suggestion" from cohesion scanners -- [ ] All findings with severity ending in "-opportunity" from execution-efficiency -- [ ] assessments.top_insights from enhancement-opportunities -- [ ] Script inventory and token savings from script-opportunities -- [ ] Skill understanding (purpose, primary_user, key_assumptions) -- [ ] Prompt health summary from prompt-craft (if prompts exist) +## What's Broken -If any item was dropped, add it to the appropriate section before writing. +{Only if critical/high issues exist} -### Step 6: Write and Return +## Opportunities -Write report to: `{quality-report-dir}/quality-report.md` +### 1. {Theme Name} ({severity} — {N} observations) -Return JSON: +{Description + Fix + constituent findings} + +## Strengths + +{What this agent does well} + +## Detailed Analysis + +### Structure & Capabilities + +### Persona & Voice + +### Identity Cohesion + +### Execution Efficiency + +### Conversation Experience + +### Script Opportunities + +## Recommendations + +1. {Highest impact} +2. ... +``` + +### 2. report-data.json + +**CRITICAL: This file is consumed by a deterministic Python script. Use EXACTLY the field names shown below. Do not rename, restructure, or omit any required fields. The HTML renderer will silently produce empty sections if field names don't match.** + +Every `"..."` below is a placeholder for your content. Replace with actual values. Arrays may be empty `[]` but must exist. ```json { - "report_file": "{full-path-to-report}", - "summary": { - "total_issues": 0, - "critical": 0, - "high": 0, - "medium": 0, - "low": 0, - "strengths_count": 0, - "enhancements_count": 0, - "user_journeys_count": 0, - "overall_quality": "Excellent|Good|Fair|Poor", - "overall_cohesion": "cohesive|mostly-cohesive|fragmented|confused", - "craft_assessment": "brief summary from prompt-craft", - "truly_broken_found": true, - "truly_broken_count": 0 + "meta": { + "skill_name": "the-agent-name", + "skill_path": "/full/path/to/agent", + "timestamp": "2026-03-26T23:03:03Z", + "scanner_count": 8, + "type": "agent" + }, + "agent_profile": { + "icon": "emoji icon from agent's SKILL.md", + "display_name": "Agent's display name", + "title": "Agent's title/role", + "portrait": "Synthesized 2-3 sentence personality portrait" }, - "by_category": { - "structure_capabilities": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "prompt_craft": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "execution_efficiency": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "path_script_standards": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "agent_cohesion": {"critical": 0, "high": 0, "medium": 0, "low": 0}, - "creative": {"high_opportunity": 0, "medium_opportunity": 0, "low_opportunity": 0} + "capabilities": [ + { + "name": "Capability display name", + "file": "references/capability-file.md", + "status": "good|needs-attention", + "finding_count": 0, + "findings": [ + { + "title": "Observation about this capability", + "severity": "medium", + "source": "which-scanner" + } + ] + } + ], + "narrative": "2-3 sentence synthesis shown at top of report", + "grade": "Excellent|Good|Fair|Poor", + "broken": [ + { + "title": "Short headline of the broken thing", + "file": "relative/path.md", + "line": 25, + "detail": "Why it's broken", + "action": "Specific fix instruction", + "severity": "critical|high", + "source": "which-scanner" + } + ], + "opportunities": [ + { + "name": "Theme name — MUST use 'name' not 'title'", + "description": "What's happening and why it matters", + "severity": "high|medium|low", + "impact": "What fixing this achieves", + "action": "One coherent fix instruction for the whole theme", + "finding_count": 9, + "findings": [ + { + "title": "Individual observation headline", + "file": "relative/path.md", + "line": 42, + "detail": "What was observed", + "source": "which-scanner" + } + ] + } + ], + "strengths": [ + { + "title": "What's strong — MUST be an object with 'title', not a plain string", + "detail": "Why it matters and should be preserved" + } + ], + "detailed_analysis": { + "structure": { + "assessment": "1-3 sentence summary", + "findings": [] + }, + "persona": { + "assessment": "1-3 sentence summary", + "overview_quality": "appropriate|excessive|missing", + "findings": [] + }, + "cohesion": { + "assessment": "1-3 sentence summary", + "dimensions": { + "persona_capability_alignment": { "score": "strong|moderate|weak", "notes": "explanation" } + }, + "findings": [] + }, + "efficiency": { + "assessment": "1-3 sentence summary", + "findings": [] + }, + "experience": { + "assessment": "1-3 sentence summary", + "journeys": [ + { + "archetype": "first-timer|expert|confused|edge-case|hostile-environment|automator", + "summary": "Brief narrative of this user's experience", + "friction_points": ["moment where user struggles"], + "bright_spots": ["moment where agent shines"] + } + ], + "autonomous": { + "potential": "headless-ready|easily-adaptable|partially-adaptable|fundamentally-interactive", + "notes": "Brief assessment" + }, + "findings": [] + }, + "scripts": { + "assessment": "1-3 sentence summary", + "token_savings": "estimated total", + "findings": [] + } }, - "high_impact_quick_wins": [ - {"issue": "description", "file": "location", "effort": "low"} + "recommendations": [ + { + "rank": 1, + "action": "What to do — MUST use 'action' not 'description'", + "resolves": 9, + "effort": "low|medium|high" + } ] } ``` -## Scanner Reference - -| Scanner | Temp File | Primary Category | -|---------|-----------|-----------------| -| structure | structure-temp.json | Structure & Capabilities | -| prompt-craft | prompt-craft-temp.json | Prompt Craft | -| execution-efficiency | execution-efficiency-temp.json | Execution Efficiency | -| path-standards | path-standards-temp.json | Path & Script Standards | -| scripts | scripts-temp.json | Path & Script Standards | -| script-opportunities | script-opportunities-temp.json | Script Opportunities | -| agent-cohesion | agent-cohesion-temp.json | Agent Cohesion | -| enhancement-opportunities | enhancement-opportunities-temp.json | Creative | +**Self-check before writing report-data.json:** + +1. Is `meta.skill_name` present (not `meta.skill` or `meta.name`)? +2. Is `meta.scanner_count` a number (not an array)? +3. Does `agent_profile` have all 4 fields: `icon`, `display_name`, `title`, `portrait`? +4. Is every strength an object `{"title": "...", "detail": "..."}` (not a plain string)? +5. Does every opportunity use `name` (not `title`) and include `finding_count` and `findings` array? +6. Does every recommendation use `action` (not `description`) and include `rank` number? +7. Does every capability include `name`, `file`, `status`, `finding_count`, `findings`? +8. Are detailed_analysis keys exactly: `structure`, `persona`, `cohesion`, `efficiency`, `experience`, `scripts`? +9. Does every journey use `archetype` (not `persona`), `summary` (not `friction`), `friction_points` array, `bright_spots` array? +10. Does `autonomous` use `potential` and `notes`? + +Write both files to `{quality-report-dir}/`. + +## Return + +Return only the path to `report-data.json` when complete. + +## Key Principle + +You are the synthesis layer. Scanners analyze through individual lenses. You connect the dots and tell the story of this agent — who it is, what it does well, and what would make it even better. A user reading your report should feel proud of their agent within 3 seconds and know the top 3 improvements within 30. diff --git a/plugins/bmad/skills/bmad-agent-builder/scripts/bmad-manifest-schema.json b/plugins/bmad/skills/bmad-agent-builder/scripts/bmad-manifest-schema.json deleted file mode 100644 index ea674b5..0000000 --- a/plugins/bmad/skills/bmad-agent-builder/scripts/bmad-manifest-schema.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "BMad Manifest Schema", - "description": "Unified schema for all BMad skill manifest files (agents, workflows, skills)", - - "type": "object", - - "properties": { - "$schema": { - "description": "JSON Schema identifier", - "type": "string" - }, - - "module-code": { - "description": "Short code for the module this skill belongs to (e.g., bmb, cis). Omit for standalone skills.", - "type": "string", - "pattern": "^[a-z][a-z0-9-]*$" - }, - - "replaces-skill": { - "description": "Registered name of the BMad skill this replaces. Inherits metadata during bmad-init.", - "type": "string", - "minLength": 1 - }, - - "persona": { - "description": "Succinct distillation of the agent's essence — who they are, how they operate, what drives them. Presence of this field indicates the skill is an agent. Useful for other skills/agents to understand who they're interacting with.", - "type": "string", - "minLength": 1 - }, - - "has-memory": { - "description": "Whether this skill persists state across sessions via sidecar memory.", - "type": "boolean" - }, - - "capabilities": { - "description": "What this skill can do. Every skill has at least one capability.", - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "properties": { - "name": { - "description": "Capability identifier (kebab-case)", - "type": "string", - "pattern": "^[a-z][a-z0-9-]*$" - }, - "menu-code": { - "description": "2-3 uppercase letter shortcut for interactive menus", - "type": "string", - "pattern": "^[A-Z]{2,3}$" - }, - "description": { - "description": "What this capability does and when to suggest it", - "type": "string" - }, - "supports-headless": { - "description": "Whether this capability can run without user interaction", - "type": "boolean" - }, - - "prompt": { - "description": "Relative path to the prompt file for internal capabilities (e.g., build-process.md). Omit if handled by SKILL.md directly or if this is an external skill call.", - "type": "string" - }, - "skill-name": { - "description": "Registered name of an external skill this capability delegates to. Omit for internal capabilities.", - "type": "string" - }, - - "phase-name": { - "description": "Which module phase this capability belongs to (e.g., planning, design, anytime). For module sequencing.", - "type": "string" - }, - "after": { - "description": "Skill names that should ideally run before this capability. If is-required is true on those skills, they block this one.", - "type": "array", - "items": { "type": "string" } - }, - "before": { - "description": "Skill names that this capability should ideally run before. Helps the module sequencer understand ordering.", - "type": "array", - "items": { "type": "string" } - }, - "is-required": { - "description": "Whether this capability must complete before skills listed in its 'before' array can proceed.", - "type": "boolean" - }, - "output-location": { - "description": "Where this capability writes its output. May contain config variables (e.g., {bmad_builder_output_folder}/agents/).", - "type": "string" - } - }, - "required": ["name", "menu-code", "description"], - "additionalProperties": false - } - } - }, - - "required": ["capabilities"], - "additionalProperties": false -} diff --git a/plugins/bmad/skills/bmad-agent-builder/scripts/generate-html-report.py b/plugins/bmad/skills/bmad-agent-builder/scripts/generate-html-report.py index a8614db..6e71d09 100644 --- a/plugins/bmad/skills/bmad-agent-builder/scripts/generate-html-report.py +++ b/plugins/bmad/skills/bmad-agent-builder/scripts/generate-html-report.py @@ -4,22 +4,18 @@ #!/usr/bin/env python3 """ -Generate an interactive HTML quality report from scanner temp JSON files. +Generate an interactive HTML quality analysis report for a BMad agent. -Reads all *-temp.json and *-prepass.json files from a quality scan output -directory, normalizes findings into a unified data model, and produces a +Reads report-data.json produced by the report creator and renders a self-contained HTML report with: - - Collapsible sections with severity filter badges - - Per-item copy-prompt buttons - - Multi-select batch prompt generator - - Executive summary with severity counts + - BMad Method branding + - Agent portrait (icon, name, title, personality description) + - Capability dashboard with expandable per-capability findings + - Opportunity themes with "Fix This Theme" prompt generation + - Expandable strengths and detailed analysis Usage: - python3 generate-html-report.py {quality-report-dir} [--open] [--skill-path /path/to/skill] - -The --skill-path is embedded in the prompt context so generated prompts -reference the correct location. If omitted, it is read from the first -temp JSON that contains a skill_path field. + python3 generate-html-report.py {quality-report-dir} [--open] """ from __future__ import annotations @@ -29,501 +25,32 @@ import platform import subprocess import sys -from datetime import datetime, timezone from pathlib import Path -# ============================================================================= -# Normalization — diverse scanner JSONs → unified item model -# ============================================================================= - -SEVERITY_RANK = { - 'critical': 0, 'high': 1, 'medium': 2, 'low': 3, - 'high-opportunity': 1, 'medium-opportunity': 2, 'low-opportunity': 3, - 'note': 4, 'strength': 5, 'suggestion': 4, 'info': 5, -} - -# Map scanner names to report sections -SCANNER_SECTIONS = { - 'workflow-integrity': 'structural', - 'structure': 'structure-capabilities', - 'prompt-craft': 'prompt-craft', - 'execution-efficiency': 'efficiency', - 'skill-cohesion': 'cohesion', - 'agent-cohesion': 'cohesion', - 'path-standards': 'quality', - 'scripts': 'scripts', - 'script-opportunities': 'script-opportunities', - 'enhancement-opportunities': 'creative', -} - -SECTION_LABELS = { - 'structural': 'Structural', - 'structure-capabilities': 'Structure & Capabilities', - 'prompt-craft': 'Prompt Craft', - 'efficiency': 'Efficiency', - 'cohesion': 'Cohesion', - 'quality': 'Path & Script Standards', - 'scripts': 'Scripts', - 'script-opportunities': 'Script Opportunities', - 'creative': 'Creative & Enhancements', -} +def load_report_data(report_dir: Path) -> dict: + """Load report-data.json from the report directory.""" + data_file = report_dir / 'report-data.json' + if not data_file.exists(): + print(f'Error: {data_file} not found', file=sys.stderr) + sys.exit(2) + return json.loads(data_file.read_text(encoding='utf-8')) -def _coalesce(*values) -> str: - """Return the first truthy string value, or empty string.""" - for v in values: - if v and isinstance(v, str) and v.strip() and v.strip() not in ('N/A', 'n/a', 'None'): - return v.strip() - return '' - - -def _norm_severity(sev: str) -> str: - """Normalize severity to lowercase, handle variants.""" - if not sev: - return 'low' - s = sev.strip().lower() - # Map common variants - return { - 'high-opportunity': 'high-opportunity', - 'medium-opportunity': 'medium-opportunity', - 'low-opportunity': 'low-opportunity', - }.get(s, s) - - -def normalize_finding(f: dict, scanner: str, idx: int) -> dict: - """ - Normalize a single finding/issue dict into the unified item model. - - Handles all known field name variants across scanners: - Title: issue | title | description (fallback) - Desc: description | rationale | observation | insight | scenario | - current_behavior | current_pattern | context | nuance - Action: fix | recommendation | suggestion | suggested_approach | - efficient_alternative | script_alternative - File: file | location | current_location - Line: line | lines - Cat: category | dimension - Impact: user_impact | impact | estimated_savings | estimated_token_savings - """ - sev = _norm_severity(f.get('severity', 'low')) - section = SCANNER_SECTIONS.get(scanner, 'other') - - # Determine item type from severity - if sev in ('strength', 'note') or f.get('category') == 'strength': - item_type = 'strength' - action_type = 'none' - selectable = False - elif sev.endswith('-opportunity'): - item_type = 'enhancement' - action_type = 'enhance' - selectable = True - elif f.get('category') == 'suggestion' or sev == 'suggestion': - item_type = 'suggestion' - action_type = 'refactor' - selectable = True - else: - item_type = 'issue' - action_type = 'fix' - selectable = True - - # --- Title: prefer 'title', fall back to old field names --- - title = _coalesce( - f.get('title'), - f.get('issue'), - _truncate(f.get('scenario', ''), 150), - _truncate(f.get('current_behavior', ''), 150), - _truncate(f.get('description', ''), 150), - f.get('observation', ''), - ) - if not title: - title = f.get('id', 'Finding') - - # --- Detail/description: prefer 'detail', fall back to old field names --- - description = _coalesce(f.get('detail')) - if not description: - # Backward compat: coalesce old field names - desc_candidates = [] - for key in ('description', 'rationale', 'observation', 'insight', 'scenario', - 'current_behavior', 'current_pattern', 'context', 'nuance', - 'assessment'): - v = f.get(key) - if v and isinstance(v, str) and v.strip() and v != title: - desc_candidates.append(v.strip()) - description = ' '.join(desc_candidates) if desc_candidates else '' - - # --- Action: prefer 'action', fall back to old field names --- - action = _coalesce( - f.get('action'), - f.get('fix'), - f.get('recommendation'), - f.get('suggestion'), - f.get('suggested_approach'), - f.get('efficient_alternative'), - f.get('script_alternative'), - ) - - # --- File reference --- - file_ref = _coalesce( - f.get('file'), - f.get('location'), - f.get('current_location'), - ) - - # --- Line reference --- - line = f.get('line') - if line is None: - lines_str = f.get('lines') - if lines_str: - line = str(lines_str) - - # --- Category --- - category = _coalesce( - f.get('category'), - f.get('dimension'), - ) - - # --- Impact (backward compat only - new schema folds into detail) --- - impact = _coalesce( - f.get('user_impact'), - f.get('impact'), - f.get('estimated_savings'), - str(f.get('estimated_token_savings', '')) if f.get('estimated_token_savings') else '', - ) - - # --- Extra fields for specific scanners --- - extra = {} - if scanner == 'script-opportunities': - action_type = 'create-script' - for k in ('determinism_confidence', 'implementation_complexity', - 'language', 'could_be_prepass', 'reusable_across_skills'): - if k in f: - extra[k] = f[k] - - # Use scanner-provided id if available - item_id = f.get('id', f'{scanner}-{idx:03d}') - - return { - 'id': item_id, - 'scanner': scanner, - 'section': section, - 'type': item_type, - 'severity': sev, - 'rank': SEVERITY_RANK.get(sev, 3), - 'category': category, - 'file': file_ref, - 'line': line, - 'title': title, - 'description': description, - 'action': action, - 'impact': impact, - 'extra': extra, - 'selectable': selectable, - 'action_type': action_type, - } - - -def _truncate(text: str, max_len: int) -> str: - """Truncate text to max_len, breaking at sentence boundary if possible.""" - if not text: - return '' - text = text.strip() - if len(text) <= max_len: - return text - # Try to break at sentence boundary - for end in ('. ', '.\n', ' — ', '; '): - pos = text.find(end) - if 0 < pos < max_len: - return text[:pos + 1].strip() - return text[:max_len].strip() + '...' - - -def normalize_scanner(data: dict) -> tuple[list[dict], dict]: - """ - Normalize a full scanner JSON into (items, meta). - Returns list of normalized items + dict of meta/assessment data. - Handles all known scanner output variants. - """ - scanner = data.get('scanner', 'unknown') - items = [] - meta = {} - - # New schema: findings[]. Backward compat: issues[] or findings[] - findings = data.get('findings') or data.get('issues') or [] - for idx, f in enumerate(findings): - items.append(normalize_finding(f, scanner, idx)) - - # Backward compat: opportunities[] (execution-efficiency had separate array) - for idx, opp in enumerate(data.get('opportunities', []), start=len(findings)): - opp_item = normalize_finding(opp, scanner, idx) - opp_item['type'] = 'enhancement' - opp_item['action_type'] = 'enhance' - opp_item['selectable'] = True - items.append(opp_item) - - # Backward compat: strengths[] (old cohesion scanners — plain strings) - for idx, s in enumerate(data.get('strengths', [])): - text = s if isinstance(s, str) else (s.get('title', '') if isinstance(s, dict) else str(s)) - desc = '' if isinstance(s, str) else (s.get('description', s.get('detail', '')) if isinstance(s, dict) else '') - items.append({ - 'id': f'{scanner}-str-{idx:03d}', - 'scanner': scanner, - 'section': SCANNER_SECTIONS.get(scanner, 'cohesion'), - 'type': 'strength', - 'severity': 'strength', - 'rank': 5, - 'category': 'strength', - 'file': '', - 'line': None, - 'title': text, - 'description': desc, - 'action': '', - 'impact': '', - 'extra': {}, - 'selectable': False, - 'action_type': 'none', - }) - - # Backward compat: creative_suggestions[] (old cohesion scanners) - for idx, cs in enumerate(data.get('creative_suggestions', [])): - if isinstance(cs, str): - cs_title, cs_desc = cs, '' - else: - cs_title = _coalesce(cs.get('title'), cs.get('idea'), '') - cs_desc = _coalesce(cs.get('description'), cs.get('detail'), cs.get('rationale'), '') - items.append({ - 'id': cs.get('id', f'{scanner}-cs-{idx:03d}') if isinstance(cs, dict) else f'{scanner}-cs-{idx:03d}', - 'scanner': scanner, - 'section': SCANNER_SECTIONS.get(scanner, 'cohesion'), - 'type': 'suggestion', - 'severity': 'suggestion', - 'rank': 4, - 'category': cs.get('type', 'suggestion') if isinstance(cs, dict) else 'suggestion', - 'file': '', - 'line': None, - 'title': cs_title, - 'description': cs_desc, - 'action': cs_title, - 'impact': cs.get('estimated_impact', '') if isinstance(cs, dict) else '', - 'extra': {}, - 'selectable': True, - 'action_type': 'refactor', - }) - - # New schema: assessments{} contains all structured analysis - # Backward compat: also collect from top-level keys - if 'assessments' in data: - meta.update(data['assessments']) - - # Backward compat: collect meta from top-level keys - skip_keys = {'scanner', 'script', 'version', 'skill_path', 'agent_path', - 'timestamp', 'scan_date', 'status', 'issues', 'findings', - 'strengths', 'creative_suggestions', 'opportunities', 'assessments'} - for key, val in data.items(): - if key not in skip_keys and key not in meta: - meta[key] = val - - return items, meta - - -def build_journeys(data: dict) -> list[dict]: - """ - Extract user journey data from enhancement-opportunities scanner. - Handles two formats: - - Array of objects: [{archetype, journey_summary, friction_points, bright_spots}] - - Object keyed by persona: {first_timer: {entry_friction, mid_flow_resilience, exit_satisfaction}} - """ - journeys_raw = data.get('user_journeys') - if not journeys_raw: - return [] - - # Format 1: already a list — normalize field names - if isinstance(journeys_raw, list): - normalized = [] - for j in journeys_raw: - if isinstance(j, dict): - normalized.append({ - 'archetype': j.get('archetype', 'unknown'), - 'journey_summary': j.get('summary', j.get('journey_summary', '')), - 'friction_points': j.get('friction_points', []), - 'bright_spots': j.get('bright_spots', []), - }) - else: - normalized.append(j) - return normalized - - # Format 2: object keyed by persona name - if isinstance(journeys_raw, dict): - result = [] - for persona, details in journeys_raw.items(): - if isinstance(details, dict): - # Convert the dict-based format to the expected format - journey = { - 'archetype': persona.replace('_', ' ').title(), - 'journey_summary': '', - 'friction_points': [], - 'bright_spots': [], - } - # Map known sub-keys to friction/bright spots - for key, val in details.items(): - if isinstance(val, str): - # Heuristic: negative-sounding keys → friction, positive → bright - if any(neg in key.lower() for neg in ('friction', 'issue', 'problem', 'gap', 'pain')): - journey['friction_points'].append(val) - elif any(pos in key.lower() for pos in ('bright', 'strength', 'satisfaction', 'delight')): - journey['bright_spots'].append(val) - else: - # Neutral keys — include as summary parts - if journey['journey_summary']: - journey['journey_summary'] += f' | {key}: {val}' - else: - journey['journey_summary'] = f'{key}: {val}' - elif isinstance(val, list): - for item in val: - if isinstance(item, str): - journey['friction_points'].append(item) - # Build summary from all fields if not yet set - if not journey['journey_summary']: - parts = [] - for k, v in details.items(): - if isinstance(v, str): - parts.append(f'**{k.replace("_", " ").title()}:** {v}') - journey['journey_summary'] = ' | '.join(parts) if parts else str(details) - result.append(journey) - elif isinstance(details, str): - result.append({ - 'archetype': persona.replace('_', ' ').title(), - 'journey_summary': details, - 'friction_points': [], - 'bright_spots': [], - }) - return result - - return [] - - -# ============================================================================= -# Report Data Assembly -# ============================================================================= - -def load_report_data(report_dir: Path, skill_path: str | None) -> dict: - """Load all temp/prepass JSONs and assemble normalized report data.""" - all_items = [] - all_meta = {} - journeys = [] - detected_skill_path = skill_path - - # Read all JSON files - json_files = sorted(report_dir.glob('*.json')) - for jf in json_files: - try: - data = json.loads(jf.read_text(encoding='utf-8')) - except (json.JSONDecodeError, OSError): - continue - - if not isinstance(data, dict): - continue - - scanner = data.get('scanner', jf.stem.replace('-temp', '').replace('-prepass', '')) - - # Detect skill path from scanner data - if not detected_skill_path: - detected_skill_path = data.get('skill_path') or data.get('agent_path') - - # Only normalize temp files (not prepass) - if '-temp' in jf.name or jf.name in ('path-standards-temp.json', 'scripts-temp.json'): - items, meta = normalize_scanner(data) - all_items.extend(items) - all_meta[scanner] = meta - - if scanner == 'enhancement-opportunities': - journeys = build_journeys(data) - elif '-prepass' in jf.name: - all_meta[f'prepass-{scanner}'] = data - - # Sort items: severity rank first, then section - all_items.sort(key=lambda x: (x['rank'], x['section'])) - - # Build severity counts - counts = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0} - for item in all_items: - if item['type'] == 'issue' and item['severity'] in counts: - counts[item['severity']] += 1 - - enhancement_count = sum(1 for i in all_items if i['type'] == 'enhancement') - strength_count = sum(1 for i in all_items if i['type'] == 'strength') - total_issues = sum(counts.values()) - - # Quality grade - if counts['critical'] > 0: - grade = 'Poor' - elif counts['high'] > 2: - grade = 'Fair' - elif counts['high'] > 0 or counts['medium'] > 5: - grade = 'Good' - else: - grade = 'Excellent' - - # Extract assessments for display - assessments = {} - for scanner_key, meta in all_meta.items(): - for akey in ('cohesion_analysis', 'autonomous_assessment', 'skill_understanding', - 'agent_identity', 'skill_identity', 'prompt_health', - 'skillmd_assessment', 'top_insights'): - if akey in meta: - assessments[akey] = meta[akey] - if 'summary' in meta: - s = meta['summary'] - if 'craft_assessment' in s: - assessments['craft_assessment'] = s['craft_assessment'] - if 'overall_cohesion' in s: - assessments['overall_cohesion'] = s['overall_cohesion'] - - # Skill name from path - sp = detected_skill_path or str(report_dir) - skill_name = Path(sp).name - - return { - 'meta': { - 'skill_name': skill_name, - 'skill_path': detected_skill_path or '', - 'timestamp': datetime.now(timezone.utc).isoformat(), - 'scanner_count': len([f for f in json_files if '-temp' in f.name]), - 'report_dir': str(report_dir), - }, - 'executive_summary': { - 'total_issues': total_issues, - 'counts': counts, - 'enhancement_count': enhancement_count, - 'strength_count': strength_count, - 'grade': grade, - 'craft_assessment': assessments.get('craft_assessment', ''), - 'overall_cohesion': assessments.get('overall_cohesion', ''), - }, - 'items': all_items, - 'journeys': journeys, - 'assessments': assessments, - 'section_labels': SECTION_LABELS, - } - - -# ============================================================================= -# HTML Generation -# ============================================================================= - HTML_TEMPLATE = r""" -Quality Report: SKILL_NAME_PLACEHOLDER +BMad Method · Quality Analysis: SKILL_NAME -

Quality Report:

+
BMad Method
+

Quality Analysis:

-
+
+
+
-
- -
- - +
+
+
+
+
+
@@ -927,63 +489,34 @@ def load_report_data(report_dir: Path, skill_path: str | None) -> dict: def generate_html(report_data: dict) -> str: - """Inject report data into the HTML template.""" data_json = json.dumps(report_data, indent=None, ensure_ascii=False) - # Embed the JSON as a script tag before the main script data_tag = f'' - # Insert before the main + +""" + + +def generate_html(report_data: dict) -> str: + """Inject report data into the HTML template.""" + data_json = json.dumps(report_data, indent=None, ensure_ascii=False) + data_tag = f'' + html = HTML_TEMPLATE.replace( + ' @@ -929,30 +476,20 @@ def load_report_data(report_dir: Path, skill_path: str | None) -> dict: def generate_html(report_data: dict) -> str: """Inject report data into the HTML template.""" data_json = json.dumps(report_data, indent=None, ensure_ascii=False) - # Embed the JSON as a script tag before the main script data_tag = f'' - # Insert before the main ', 'original_source': '', 'timestamp': ''}, + 'metrics': {'original': {}, 'rebuilt': {}}, + 'reductions': {}, + 'cuts': [], + 'retained': [], + 'verdict': '', + } + html = generate_html(report_data) + # The skill name in the JSON should be escaped by json.dumps + assert '