diff --git a/.claude/.gitignore b/.claude/.gitignore new file mode 100644 index 00000000..93c0f73f --- /dev/null +++ b/.claude/.gitignore @@ -0,0 +1 @@ +settings.local.json diff --git a/.claude/behavior.md b/.claude/behavior.md deleted file mode 100644 index 9d3be98e..00000000 --- a/.claude/behavior.md +++ /dev/null @@ -1,44 +0,0 @@ -# Claude-Specific Behavior Guidelines - -## Core Behavior Pattern - -**ALWAYS check CLAUDE.md and .development/ FIRST** before analyzing the codebase or answering questions about established patterns. - -## When You Should Update Documentation - -### Discovering New Patterns -- Code conventions not documented in `.development/patterns.md` -- Architecture decisions not covered in `.development/architecture.md` -- Development workflows not in `.development/workflows.md` -- Documentation practices not in `.development/documentation.md` - -### Making Implementation Decisions -- Update `.development/decisions/` with RFC-style decision records -- Capture rationale for future reference -- Include concrete examples and alternatives considered - -## Multi-Session Work Tracking - -Use `.claude/ongoing/` for work spanning multiple sessions: - -### File Naming -`ongoing_feature_name.md` - One file per major task - -### Required Content -- **Status**: Current state (planning, implementing, testing, etc.) -- **Context**: Background and motivation -- **Work Completed**: What's been done so far -- **Next Steps**: Specific tasks to continue -- **Code References**: File paths and line numbers for relevant code - -### Lifecycle -- **Create** when starting multi-session work -- **Update** at end of each session -- **Remove** when work is complete - -## Interaction Guidelines - -- Reference specific files and line numbers when discussing code -- Use format `file_path:line_number` for easy navigation -- Check existing documentation before providing explanations -- Update documentation when discovering gaps or new patterns \ No newline at end of file diff --git a/.claude/skills/author-code/SKILL.md b/.claude/skills/author-code/SKILL.md new file mode 100644 index 00000000..083d4298 --- /dev/null +++ b/.claude/skills/author-code/SKILL.md @@ -0,0 +1,88 @@ +--- +name: author-code +description: Conventions for authoring Rust code in the Dada compiler. Use when writing or modifying Rust code, adding functions, or making implementation changes. +--- + +# Authoring Code in Dada + +## Code Style + +- **Match local conventions** — mimic the style of the file you're editing +- **Use existing utilities** — check for helper functions before writing new ones +- **Check dependencies first** — never assume a library is available; verify in Cargo.toml +- **Follow existing patterns** — look at neighboring code and match its style + +## Documentation + +### When to document +- **Complex functions** encoding Dada's semantics need thorough documentation +- **Self-evident code** — simple utility functions don't need extensive comments + +### Documentation pattern +- Use doc comments (`///`) with high-level explanation +- Include concrete Dada code examples showing the feature being implemented +- Use inline comments referencing back to the examples + +```rust +/// Type checks a method call expression like `obj.method(args)`. +/// +/// # Example +/// ```dada +/// let p = Point(x: 10, y: 20) +/// p.distance(other) # <-- we are type checking this +/// ``` +/// +/// The method resolution follows these steps: +/// 1. Determine the type of the receiver (`p`) +/// 2. Look up the method in the type's namespace +/// 3. Check argument compatibility +fn type_check_method_call(...) { + // Step 1: Get receiver type (Point in our example) + let receiver_ty = self.type_of(receiver); + + // Step 2: Resolve method - this handles the lookup of `distance` + // in the Point type's method table + let method = self.resolve_method(receiver_ty, method_name)?; +} +``` + +## Insight Comments (`💡`) + +Use `💡` comments to capture non-obvious constraints and reasoning for future sessions. + +### Format +- **Preamble comment** on functions: explain the overall algorithmic or architectural choice +- **Inline comments** at the start of logical blocks: explain reasoning for that block +- **Before modifying code with `💡` comments**: pause and consider whether the reasoning affects your planned changes + +### Decision boundaries + +Annotate non-obvious decisions — skip self-explanatory code: +- ❌ `// 💡: Using a loop to iterate through items` +- ✅ `// 💡: Using manual iteration instead of map() to handle partial failures gracefully` + +Document constraint-driven choices: +- ❌ `// 💡: Using async/await for the API call` +- ✅ `// 💡: Using async/await because this API has 2-second response times that would block the UI` + +Document tradeoffs and alternatives: +- ✅ `// 💡: Using Redis instead of in-memory cache because we need persistence across server restarts` + +Capture consistency requirements: +- ✅ `// 💡: Using Result pattern to match error handling in auth.rs and database.rs modules` + +### Guidelines +1. **Focus on decisions with alternatives** — if there was only one way to do it, don't annotate +2. **Update annotations when modifying code** — ensure reasoning still matches implementation +3. **Be concise but specific** — future sessions should understand the decision quickly + +## Error Handling + +- Use diagnostics infrastructure from `dada-ir-ast` +- Provide helpful error messages with source spans +- Follow existing error formatting patterns + +## References + +- Crate-level rustdoc — each crate has `docs/overview.md` included via `#![doc = include_str!(...)]` +- Run `cargo doc --open` to browse implementation docs diff --git a/.claude/skills/rfc-workflow/SKILL.md b/.claude/skills/rfc-workflow/SKILL.md new file mode 100644 index 00000000..9bf40a92 --- /dev/null +++ b/.claude/skills/rfc-workflow/SKILL.md @@ -0,0 +1,128 @@ +--- +name: rfc-workflow +description: RFC and specification workflow for Dada language features. Use when working with RFCs, writing spec paragraphs, or tracking implementation progress. +--- + +# RFC and Specification Workflow + +This skill covers the practical RFC and spec integration workflow. + +## RFC Directory Structure + +``` +rfcs/src/NNNN-feature-name/ +├── README.md # The RFC document (design, motivation, examples) +├── impl.md # Implementation progress tracking +└── todo.md # Session-specific work tracking and context +``` + +Create new RFCs with: `cargo xtask rfc new feature-name` + +## Spec Paragraph Authoring + +Spec paragraphs live in `spec/src/` using MyST directive syntax. + +### Block directives + +```markdown +:::{spec} local-name tag1 tag2 +Paragraph content describing one testable behavior. +::: +``` + +**Tags**: +- `rfcNNNN` — Links paragraph to an RFC (e.g., `rfc0001`) +- `unimpl` — Feature is specified but not yet implemented +- No tag for the local name means the ID comes from headings only + +### Inline sub-paragraphs + +Inside a block, mark sub-items with inline spec tags: + +```markdown +:::{spec} rfc0001 +String literals support these escape sequences: + +* {spec}`backslash` `\\` produces a literal backslash. +* {spec}`newline` `\n` produces a newline. +::: +``` + +Each `` {spec}`name` `` creates a sub-paragraph with its own ID. Tags like `unimpl` can follow the name: `` {spec}`triple-quoted unimpl` ``. + +### Paragraph ID format + +``` +file-prefix.heading-segment.local-name.inline-name +``` + +- File prefix: from path (e.g., `syntax.string-literals`) +- Heading segments: H2+ headings, lowercased, spaces → hyphens +- Local name: from `:::{spec} local-name` +- Inline name: from `` {spec}`name` `` + +## Workflow: When to Put Spec Paragraphs Where + +**Design is mature** → Author directly in `spec/src/` with `rfcNNNN unimpl` tags. This is the preferred approach — it validates the spec structure early. + +**Design is still evolving** → Draft in the RFC's spec.md, then move to `spec/src/` during implementation. + +The key insight: if you know enough to write a spec paragraph, put it in the spec. The `unimpl` tag makes it clear it's not yet implemented. + +## Implementation Tracking + +### impl.md + +Track implementation progress in the RFC's impl.md: + +```markdown +# Implementation Progress + +## Status: In Progress + +### Completed +- [x] Spec paragraphs drafted in spec/src/ +- [x] Basic string literal parsing + +### In Progress +- [ ] Triple-quoted string support + +### Not Started +- [ ] String interpolation +``` + +### todo.md + +Track session-specific context in todo.md: + +```markdown +# Current Session + +## Focus +What we're working on right now + +## Next Steps +- Specific actionable items + +## Open Questions +- Things still being figured out +``` + +## Cross-Referencing + +- **Tests → Spec**: `#:spec syntax.string-literals.escape-sequences.backslash` +- **Spec → RFC**: `rfc0001` tag on `:::{spec}` directives +- **RFC → Spec**: Reference spec section in RFC README.md + +Keep these synchronized. When adding a new spec paragraph, check if tests exist. When writing tests, add the `#:spec` annotation. + +## Implementation Workflow + +When implementing an RFC feature, follow this cycle for each piece of work: + +1. Implement the feature in the compiler +2. Write tests with `#:spec` annotations +3. Remove `unimpl` from the spec paragraph tag +4. **Update the RFC's `impl.md`** — check off completed items, add new items discovered during implementation + +Keep `impl.md` current as you go. It's the living record of what's done and what's next — don't wait until the end of a session to update it. diff --git a/.claude/skills/run-tests/SKILL.md b/.claude/skills/run-tests/SKILL.md new file mode 100644 index 00000000..29037012 --- /dev/null +++ b/.claude/skills/run-tests/SKILL.md @@ -0,0 +1,109 @@ +--- +name: run-tests +description: Run and interpret Dada test results. Use when running tests, debugging test failures, or understanding test output. +--- + +# Running Dada Tests + +## Commands + +```bash +# Run all tests +cargo dada test --porcelain + +# Run tests in a directory +cargo dada test --porcelain tests/syntax/string_literals/ + +# Run a single test file +cargo dada test --porcelain tests/syntax/string_literals/type.dada +``` + +Always use `--porcelain` for machine-readable JSON output with structured failure information. + +## Interpreting Output + +The `--porcelain` flag produces JSON with this structure: + +```json +{ + "summary": { "total": 45, "passed": 45, "failed": 0, "duration_ms": 70 }, + "tests": [ + { + "path": "tests/syntax/string_literals/type.dada", + "status": "pass", + "annotations": ["#:skip_codegen", "#:spec syntax.string-literals.type"] + } + ] +} +``` + +For failures, each test includes: +- `suggestion` — Actionable guidance on how to resolve the failure. **Read this first.** +- `details` — Usually points to a `.test-report.md` file alongside the test + +## Reading Test Reports + +When a test fails, a `.test-report.md` file is generated next to the test file. It contains: + +1. **Compiler output** — The actual errors/warnings produced +2. **Unexpected diagnostics** — Errors the compiler produced that the test didn't expect +3. **Missing expected diagnostics** — `#!` annotations that didn't match any compiler output +4. **Probe failures** — `#?` probes that got unexpected results +5. **Next Steps** — Suggestions for resolution + +## Test Annotation Reference + +### Spec references +```dada +#:spec syntax.string-literals.escape-sequences.backslash +``` +Links the test to a spec paragraph. Validated against actual spec files. + +### Directives +```dada +#:skip_codegen # Skip WebAssembly generation (use for parser/type-check only tests) +#:fn_asts # Compare function AST output against .ref file +``` + +### Diagnostic expectations (`#!`) + +**Without carets** — error can start anywhere on the previous interesting line: +```dada +print(unknown_var) +#! could not find anything named `unknown_var` +``` + +**With carets** — error span must match exactly (caret position = column on previous line): +```dada +fn test() { bad_name() } +#! ^^^^^^^^ could not find anything named `bad_name` +``` + +**With regex** — use `/` prefix (opening `/` only, NO closing `/`): +```dada +is_shared(x.mut) +#! /where clause.*not satisfied +``` + +> **Important**: The regex convention uses `/pattern` with NO closing `/`. Including a closing `/` causes the `/` to be part of the regex pattern, which will fail to match. + +### Type probes (`#?`) + +```dada +let x = 22 + 44 +#? ^^ ExprType: u32 # Type of expression at that span +#? ^ VariableType: u32 # Type of the variable +``` + +Caret position must align with the target on the previous line. `VariableType` shows the declared type (e.g., `String` not `my String` — the permission is on the expression, not the variable). + +Probes also support regex with `/`: +```dada +#? ^ VariableType: /my.*String +``` + +## Common Patterns + +- Tests that only exercise parsing/type-checking should use `#:skip_codegen` +- Error tests use `#!` annotations; the test passes when all expected errors match and no unexpected errors appear +- Probe tests use `#?` annotations; the test passes when all probes return expected values diff --git a/.claude/skills/tracking-issues/SKILL.md b/.claude/skills/tracking-issues/SKILL.md new file mode 100644 index 00000000..4734f491 --- /dev/null +++ b/.claude/skills/tracking-issues/SKILL.md @@ -0,0 +1,89 @@ +--- +name: tracking-issues +description: Track context across sessions for long-running features. Use when starting multi-session work, checkpointing progress, or resuming work on a feature tracked in a GitHub issue. +--- + +# Tracking Long-Running Work + +Use GitHub issues as living documents to maintain context across work sessions. One issue per user-facing feature. + +## Quick Reference + +```bash +# Find active work +gh issue list --label tracking-issue + +# Check a specific issue +gh issue view +``` + +## When to Use + +**Not for RFC features.** RFC-tracked work uses `impl.md` in the RFC directory for detailed progress tracking. A GitHub issue for an RFC should just be a lightweight pointer with links to the RFC and its impl status (e.g., `https://dada-lang.org/rfcs/NNNN-feature-name/impl.html`). + +For non-RFC work (refactors, bug investigations, infrastructure) that spans 2+ sessions or multiple code areas, use a tracking issue. + +## Issue Structure + +**Labels**: `tracking-issue`, `ai-managed`, plus type (`feature`, `bug`, `refactor`) + +**Title**: Clear user-facing outcome (not "encryption work" — instead "Implement client-side encryption") + +**OP template** (keep updated as the living summary): + +```markdown +# Feature Name + +**Status**: Planning | In Progress | Blocked | Complete + +## Current Understanding +Brief summary of what needs to be done and current approach + +## Next Steps +- [ ] Specific actionable item with file:line references +- [ ] Another concrete next step + +## Open Questions +- What we're still figuring out + +## Context +Key background and why this work matters now +``` + +## Working with Issues + +### Starting a session +Read the issue OP to understand current state. Work from "Next Steps." + +### During work +- **Update OP** when: approach changes, major blockers discovered, next steps shift +- **Add comments** when: completing work sessions, discovering insights, hitting roadblocks + +### Checkpointing +1. Find relevant tracking issue +2. Draft a comment summarizing the session +3. Show draft to user for approval before posting +4. Update OP if approach or next steps changed + +### Comment structure + +```markdown +**Session summary:** +- What was attempted or explored +- Key discoveries or problems encountered + +**Impact on approach:** +- How understanding changed +- New questions that emerged + +**Progress:** Completed items from next steps, what's next +``` + +### Completion +Set status to "Complete" and close the issue. + +## Boundaries + +- Only modify issues labeled `ai-managed` +- Always get user approval before posting comments or editing the OP +- Reference issues in commit messages when relevant diff --git a/.claude/skills/write-tests/SKILL.md b/.claude/skills/write-tests/SKILL.md new file mode 100644 index 00000000..e7ef3f8f --- /dev/null +++ b/.claude/skills/write-tests/SKILL.md @@ -0,0 +1,127 @@ +--- +name: write-tests +description: Write spec-aligned Dada tests. Use when creating new test files, organizing tests to match the specification, or adding test coverage for language features. +--- + +# Writing Spec-Aligned Dada Tests + +## Directory Structure + +Tests mirror the spec directory structure under `tests/`: + +``` +spec/src/syntax/string-literals.md → tests/syntax/string_literals/ +spec/src/syntax/literals.md → tests/syntax/literals/ +``` + +Within each test directory, organize by spec section: + +``` +tests/syntax/string_literals/ +├── delimiters/ +│ └── quoted.dada # #:spec syntax.string-literals.delimiters.quoted +├── type.dada # #:spec syntax.string-literals.type +├── escape_sequences/ +│ ├── backslash.dada # #:spec syntax.string-literals.escape-sequences.backslash +│ ├── invalid.dada # #:spec syntax.string-literals.escape-sequences.invalid +│ └── ... +└── interpolation/ + └── brace_escaping.dada # #:spec syntax.string-literals.interpolation.brace-escaping +``` + +Ad-hoc tests that don't correspond to a spec paragraph go in `tests/adhoc/`. + +## Spec Paragraph ID Resolution + +IDs are built from the spec file path and headings: + +1. **File prefix**: `spec/src/syntax/string-literals.md` → `syntax.string-literals` + - `README.md` is special: only the parent directory becomes the prefix +2. **Heading segments**: H2+ headings, lowercased, spaces/underscores → hyphens + - H1 is skipped (it's the page title, already in the file prefix) +3. **Block local name**: From `:::{spec} local_name` directive + - If the first token looks like a tag (`rfc0001`, `unimpl`), there's no local name +4. **Inline sub-paragraph**: From `` {spec}`name` `` inside a block + +### Examples + +```markdown +# String Literals ← H1: skipped (in file prefix) +## Escape Sequences ← H2: "escape-sequences" +:::{spec} rfc0001 ← Block: no local name (rfc0001 is a tag) +* {spec}`backslash` `\\` produces... ← Inline: "backslash" +::: +:::{spec} invalid rfc0001 ← Block: local name = "invalid" +::: +``` + +Resulting IDs: +- `syntax.string-literals.escape-sequences` (the block) +- `syntax.string-literals.escape-sequences.backslash` (inline) +- `syntax.string-literals.escape-sequences.invalid` (named block) + +## Writing Test Files + +### Basic test structure +```dada +#:spec syntax.string-literals.delimiters.quoted +#:skip_codegen + +async fn main() { + print("hello").await + print("").await +} +``` + +### Error test (diagnostic expectations) +```dada +#:spec syntax.string-literals.escape-sequences.invalid +#:skip_codegen + +async fn main() { + print("\a").await + #! ^ /invalid escape +} +``` + +The `^` must be at the exact column of the error span on the previous line. Use `/pattern` (NO closing `/`) for regex matching. + +### Type probe test +```dada +#:spec syntax.string-literals.type +#:skip_codegen + +fn main() { + let x = "hello" + #? ^ VariableType: String +} +``` + +`VariableType` shows the declared type without permissions. Use `ExprType` for expression types. Multiple `^^` carets match multi-byte spans. + +## Known Issues + +### Brace escaping in strings +The tokenizer's `delimited()` function doesn't skip over string literal contents when scanning for matching braces. Unbalanced `{` or `}` inside strings will confuse brace-depth tracking. **Workaround**: Use balanced `\{...\}` pairs in test strings: + +```dada +# Good — balanced braces +print("\{\}").await +print("hello\{world\}").await + +# Bad — unbalanced brace causes parse errors +print("\{").await +``` + +This will be fixed when string interpolation is implemented. + +## Checklist for New Tests + +1. Identify the spec paragraph ID for the feature being tested +2. Create the test file in the matching directory structure +3. Add `#:spec ` annotation +4. Add `#:skip_codegen` if the test doesn't need WebAssembly generation +5. Write test code exercising the feature +6. Add `#!` annotations for expected errors or `#?` probes for type checking +7. Run with `cargo dada test --porcelain ` to verify +8. Check `.test-report.md` if the test fails diff --git a/.development/architecture.md b/.development/architecture.md deleted file mode 100644 index 4c1c547d..00000000 --- a/.development/architecture.md +++ /dev/null @@ -1,48 +0,0 @@ -# Dada Compiler Architecture - -## Overview - -The Dada compiler is built as a Cargo workspace using the Salsa incremental computation framework. The compiler transforms source code through multiple intermediate representations. - -## Compilation Pipeline - -```text -Source Code (.dada files) - ↓ -[dada-parser] → Tokens → AST - ↓ -[dada-ir-ast] → AST with spans and diagnostics - ↓ -[dada-ir-sym] → Symbolic IR (type-checked, high-level) - ↓ -[dada-check] → Type checking orchestration - ↓ -[dada-codegen] → WebAssembly (incomplete) -``` - -## Key Components - -### Core Pipeline Crates - -- **[`dada-parser`](https://dada-lang.org/impl/dada_parser/)** - Tokenization and parsing -- **[`dada-ir-ast`](https://dada-lang.org/impl/dada_ir_ast/)** - AST representation -- **[`dada-ir-sym`](https://dada-lang.org/impl/dada_ir_sym/)** - Symbolic IR and type system -- **[`dada-check`](https://dada-lang.org/impl/dada_check/)** - Type checking orchestration -- **[`dada-codegen`](https://dada-lang.org/impl/dada_codegen/)** - WebAssembly generation - -### Supporting Infrastructure - -- **[`dada-lang`](https://dada-lang.org/impl/dada_lang/)** - CLI entry point -- **[`dada-compiler`](https://dada-lang.org/impl/dada_compiler/)** - Compilation orchestration -- **[`dada-debug`](https://dada-lang.org/impl/dada_debug/)** - Debug server -- **[`dada-lsp-server`](https://dada-lang.org/impl/dada_lsp_server/)** - Language Server Protocol -- **[`dada-util`](https://dada-lang.org/impl/dada_util/)** - Shared utilities - -## Documentation Approach - -Implementation details are documented in rustdoc within each crate. The documentation uses: -- Module-level docs with `//!` comments -- External markdown files included via `#![doc = include_str!("../docs/overview.md")]` -- Comprehensive explanations of algorithms and design decisions - -To explore implementation details, visit the crate documentation links above or run `just doc-open` locally. \ No newline at end of file diff --git a/.development/documentation.md b/.development/documentation.md deleted file mode 100644 index 7edf5ea1..00000000 --- a/.development/documentation.md +++ /dev/null @@ -1,68 +0,0 @@ -# Documentation Guidelines - -## Rustdoc Structure - -The compiler uses rustdoc for comprehensive documentation: - -- **Module-level docs** - Use `//!` comments at the top of files -- **External markdown** - Include via `#![doc = include_str!("../docs/overview.md")]` -- **Cross-references** - Link between items using rustdoc syntax - -### Documentation Files -Each crate can have a `docs/` directory containing: -- `overview.md` - High-level introduction to the crate -- Topic-specific files (`type_checking.md`, `permissions.md`, etc.) - -These are included in the module documentation and appear in generated rustdoc. - -## Link Conventions - -### Cross-Crate Links -Use regular markdown links: `[text](../crate_name)` - -### Intra-Crate Links -Use rustdoc links with backticks: `[item](`path::to::item`)` - -Examples: -- `[MyStruct](`crate::module::MyStruct`)` -- `[method](`Self::method_name`)` - -## Code Blocks - -Always specify the language: -- ` ```rust` - For Rust code -- ` ```dada` - For Dada code examples -- ` ```text` - For output, errors, or mixed content -- ` ```bash` - For shell commands - -## Writing Style - -- **Factual tone** - Describe what code does, not its quality -- **Concrete examples** - Use Dada code to illustrate concepts -- **Clear structure** - Organize with headers and sections -- **Avoid adjectives** - Skip words like "powerful", "elegant", "robust" - -## Building Documentation - -```bash -# Generate all docs -just doc - -# Generate and open in browser -just doc-open - -# Serve locally at http://localhost:8000 -just doc-serve - -# Manual generation -cargo doc --workspace --no-deps --document-private-items -``` - -## Documentation Coverage - -Major areas that should be documented: -- Compilation pipeline and phases -- Type system and inference -- Permission system -- Error handling and recovery -- Language semantics \ No newline at end of file diff --git a/.development/patterns.md b/.development/patterns.md deleted file mode 100644 index e5d49706..00000000 --- a/.development/patterns.md +++ /dev/null @@ -1,67 +0,0 @@ -# Code Patterns and Conventions - -This document describes established patterns in the Dada codebase. When contributing, follow these conventions for consistency. - -## General Principles - -- **Follow existing patterns** - Look at neighboring code and match its style -- **Check dependencies first** - Never assume a library is available; verify in Cargo.toml -- **Security first** - Never expose or log secrets; never commit keys - -## Code Style - -- **Match local conventions** - Mimic the style of the file you're editing -- **Use existing utilities** - Check for helper functions before writing new ones - -## Documentation Style - -### When to Document -- **Complex functions** - Functions encoding Dada's semantics need thorough documentation -- **Self-evident code** - Simple utility functions don't need extensive comments - -### Documentation Pattern -- **Function-level docs** - Use doc comments (`///`) to explain high-level functionality -- **Concrete examples** - Include Dada code examples showing the feature being implemented -- **Implementation comments** - Use inline comments to explain specific parts, referencing back to the examples - -Example: -```rust -/// Type checks a method call expression like `obj.method(args)`. -/// -/// # Example -/// ```dada -/// let p = Point(x: 10, y: 20) -/// p.distance(other) # <-- we are type checking this -/// ``` -/// -/// The method resolution follows these steps: -/// 1. Determine the type of the receiver (`p`) -/// 2. Look up the method in the type's namespace -/// 3. Check argument compatibility -fn type_check_method_call(...) { - // Step 1: Get receiver type (Point in our example) - let receiver_ty = self.type_of(receiver); - - // Step 2: Resolve method - this handles the lookup of `distance` - // in the Point type's method table - let method = self.resolve_method(receiver_ty, method_name)?; -} -``` - -## Testing Patterns - -- Test files go in `tests/` with `.dada` extension -- Use `#:skip_codegen` for tests that don't need WebAssembly generation -- Parser tests in `tests/parser/` -- Type checking tests in `tests/type_check/` -- Experimental features in `tests/spikes/` - -## Error Handling - -- Use diagnostics infrastructure from `dada-ir-ast` -- Provide helpful error messages with source spans -- Follow existing error formatting patterns - -## Documentation - -See [documentation.md](./documentation.md) for rustdoc guidelines. \ No newline at end of file diff --git a/.development/rfc.md b/.development/rfc.md deleted file mode 100644 index 248f9399..00000000 --- a/.development/rfc.md +++ /dev/null @@ -1,94 +0,0 @@ -# RFC Workflow - -## Overview -Dada uses an RFC (Request for Comments) process for proposing and tracking language changes. The workflow involves three documentation sites: - -- **RFCs** (`rfcs/` → `dada-lang.org/rfcs`) - Design proposals and decisions -- **Specification** (`spec/` → `dada-lang.org/spec`) - Authoritative language specification -- **User Docs** (`book/` → `dada-lang.org`) - Tutorials and guides - -## RFC Structure -Each RFC lives in its own directory under `rfcs/src/`: - -``` -rfcs/src/ - SUMMARY.md # Table of contents, organized by status - 0000-template/ # Template for new RFCs - 0001-feature-name/ - README.md # The RFC document - impl.md # Implementation progress tracking - spec.md # Draft specification text - examples/ # Example code (optional) -``` - -## Creating an RFC - -### Quick Start -```bash -cargo xtask rfc new feature-name -``` - -This command will: -1. Find the next sequential RFC number automatically -2. Create directory `rfcs/src/NNNN-feature-name/` -3. Copy template files from `0000-template/` -4. Update `rfcs/src/SUMMARY.md` with your new RFC -5. Replace placeholders with your RFC number and title - -### Template -The RFC template is available in the RFC book at `/rfcs/` under "RFC-0000: Template". It includes: -- Complete RFC document structure with all required sections -- Implementation tracking template in `impl.md` -- Specification draft template in `spec.md` - -## Iterative Development -RFCs, implementation, and specification evolve together: - -1. **Design phase**: Focus on RFC document, capture ideas in spec.md -2. **Implementation**: Update impl.md with progress, refine spec.md based on experience -3. **Completion**: Integrate spec.md into main specification, update RFC status - -## Specification Guidelines -- Use semantic paragraph identifiers: `r[topic.subtopic.detail]` -- Each paragraph should specify one testable behavior -- Tests reference spec paragraphs: `#:spec topic.subtopic.detail` -- Examples: `r[syntax.string-literals.escape-sequences]`, `r[permissions.lease.transfer-rules]` - -## Cross-referencing -- RFCs can reference future spec sections -- Specs include non-normative RFC references for context -- Tests annotate which spec paragraphs they verify -- Keep paragraph IDs stable during reorganization - -## RFC Status Lifecycle -- **active**: Under discussion and design -- **accepted**: Design approved, ready for implementation -- **implemented**: Complete with working code -- **rejected**: Not proceeding (kept for historical record) -- **withdrawn**: Author chose not to proceed - -## Decision Process -@nikomatsakis acts as BDFL (Benevolent Dictator For Life) for RFC acceptance decisions. - -## RFC Authorship Style Guide - -When writing RFCs, follow these style preferences: - -### Content Principles -- **Minimal and focused** - Include only essential content. Three design tenets are better than five if they capture the core ideas. -- **Language features over implementation** - Focus on user-facing design decisions. Avoid discussing optimizations or implementation details unless central to the design. -- **Practical escape hatches** - Prefer simple, single-character solutions (like `"\`) over complex mechanisms. - -### Writing Examples -- **Executable over descriptive** - Use `assert` statements that can actually run rather than comments explaining results -- **Complete examples** - Include variable definitions so examples are self-contained - -### Specification Style -- **Atomic rules** - Write separate rules for each concept rather than compound rules -- **Clear precedence** - When syntax can be ambiguous (like `"""` vs empty string + quote), add explicit clarifying notes -- **Clean separation** - One rule for types, separate rules for syntax variations, so general rules don't need to enumerate all cases - -### Language Precision -- **Be specific** - "leading newline is preserved" is clearer than "preserved exactly as written" -- **Active voice** - "String literals have type `my String`" not "The type of string literals is `my String`" -- **Consistent terminology** - Pick terms and use them consistently throughout \ No newline at end of file diff --git a/.development/workflows.md b/.development/workflows.md deleted file mode 100644 index c2b7fbc3..00000000 --- a/.development/workflows.md +++ /dev/null @@ -1,77 +0,0 @@ -# Development Workflows - -## Building and Running - -### Basic Commands -```bash -# Run the Dada compiler -cargo dada run - -# Run tests -cargo dada test - -# Run a specific test file -cargo dada test tests/hello_world.dada - -# Compile without running -cargo dada compile - -# Debug mode with introspection server -cargo dada debug -``` - -### Using Just -```bash -# Run all tests -just test - -# Generate and open documentation -just doc-open - -# Serve documentation locally -just doc-serve -``` - -## Testing Workflow - -1. **Write test file** - Create `.dada` file in appropriate `tests/` subdirectory -2. **Run test** - Use `cargo dada test ` -3. **Check output** - Tests generate `.test-report.md` files with results -4. **Use directives** - Add `#:skip_codegen` if WebAssembly generation isn't needed - -## Documentation Workflow - -1. **Write docs in crate** - Add to module docs or `docs/*.md` files -2. **Build locally** - Run `just doc` to verify -3. **Check links** - Ensure cross-references work -4. **View result** - Use `just doc-open` to review - -## Adding New Language Features - -New language features follow an RFC-driven process: - -1. **Draft RFC** - Create `rfc/rfcNNNN_feature_name.md` describing user-facing behavior -2. **Supporting materials** - Add details in `rfc/rfcNNNN_feature_name/` subdirectory -3. **Discuss architecture** - Confirm major design decisions before implementation -4. **Implement feature** - Follow the RFC's agreed design -5. **Update documentation** - Document implementation in compiler rustdoc -6. **Update language spec** - Add feature to language specification when complete - -The RFC should focus on motivation, user experience, and examples. Implementation details can evolve during development. - -## Fixing Bugs - -1. Add failing test case -2. Debug using `cargo dada debug` -3. Fix the issue -4. Verify test passes -5. Check for regressions with `just test` - -## Debugging the Compiler - -The debug server allows introspection of compiler internals: - -```bash -cargo dada debug examples/hello.dada -# Opens web UI showing compilation steps -``` \ No newline at end of file diff --git a/.gitignore b/.gitignore index 46c4f5b8..80d9258c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ target -.claude .vscode playground dada_debug diff --git a/.socratic-shell/.commit-stamp b/.socratic-shell/.commit-stamp deleted file mode 100644 index 6a9f1fd9..00000000 --- a/.socratic-shell/.commit-stamp +++ /dev/null @@ -1 +0,0 @@ -8c51ae2ff802c1743755bcbd8c1220247970ebc1 diff --git a/.socratic-shell/README.md b/.socratic-shell/README.md deleted file mode 100644 index 592b3208..00000000 --- a/.socratic-shell/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Project Collaboration Patterns - -This directory contains collaboration patterns designed to be installed per project. - -The installation step imports these scripts into your repository and includes a script that can be used to synchronize the files in your repository with the "main copies" found in the github repository. - -## Installation - -```bash -curl https://raw.githubusercontent.com/socratic-shell/socratic-shell/main/src/prompts/project/install.sh | bash -``` - -This will sync the patterns to your project's `.socratic-shell/` directory. - -## Usage - -Add to your project's CLAUDE.md: - -```markdown -# Team Collaboration Patterns -@.socratic-shell/README.md -``` - -## Files - -- `README.md` - This file -- `install.sh` - Installation script for project teams diff --git a/.socratic-shell/ai-insights.md b/.socratic-shell/ai-insights.md deleted file mode 100644 index b9e930c9..00000000 --- a/.socratic-shell/ai-insights.md +++ /dev/null @@ -1,41 +0,0 @@ -# AI Insights System - -Capture non-obvious constraints and reasoning for future AI programming sessions using `💡` comments. - -## Annotation Format - -**💡**: Why you chose this specific implementation approach - -**Always include a preamble comment** when generating functions to explain the overall algorithmic or architectural choice. - -**For inline comments**, place them at the start of logical blocks - groups of related statements separated by blank lines - to explain the reasoning for that specific block of code. - -**Before modifying code with `💡` comments**, pause and consider: does this reasoning affect my planned changes? These comments capture constraints and tradeoffs that aren't obvious from the code alone. - -## Multi-line Annotations - -For longer explanations, use separate comment lines or add to the end of existing comments. - -## Decision Boundaries - -**Focus on non-obvious decisions** - don't annotate self-explanatory code: -- ❌ `# 💡: Using a loop to iterate through items` -- ✅ `# 💡: Using manual iteration instead of map() to handle partial failures gracefully` - -**Include constraint-driven choices** - especially document limitations that might be forgotten: -- ❌ `# 💡: Using async/await for the API call` -- ✅ `# 💡: Using async/await because this API has 2-second response times that would block the UI` - -**Document tradeoffs and alternatives** - explain why you chose this path: -- ❌ `# 💡: Using Redis for caching` -- ✅ `# 💡: Using Redis instead of in-memory cache because we need persistence across server restarts` - -**Capture consistency requirements** - document when you're matching existing patterns: -- ❌ `# 💡: Using the same error handling as other functions` -- ✅ `# 💡: Using Result pattern to match error handling in auth.rs and database.rs modules` - -## Guidelines - -1. **Focus on decisions with alternatives** - if there was only one way to do it, probably don't annotate -2. **Update annotations when modifying code** - ensure reasoning still matches the implementation -3. **Be concise but specific** - future AI should understand the decision quickly \ No newline at end of file diff --git a/.socratic-shell/github-tracking-issues.md b/.socratic-shell/github-tracking-issues.md deleted file mode 100644 index fa0d9666..00000000 --- a/.socratic-shell/github-tracking-issues.md +++ /dev/null @@ -1,146 +0,0 @@ -# GitHub Tracking Issues - -*Convention for tracking ongoing work using GitHub issues as living documents* - -## Quick Start - -**Check current work**: `gh issue list --label tracking-issue` -**Create new issue**: Get approval, use labels `tracking-issue,ai-managed,feature` -**During work**: Update OP for major changes, add comments for session details -**Checkpoint**: Draft comment with session progress, get approval before posting - -## The Pattern - -Use GitHub issues with the `tracking-issue` label to track active development work. One issue per user-facing feature that takes multiple work sessions. The Original Post (OP) serves as current status summary, while comments capture the detailed work journey. - -**Scope guideline**: If it would take 2+ days or involves multiple code areas, it probably warrants a tracking issue. - -## Issue Creation Convention - -**Title**: Clear description of user-facing feature -- ✅ "Implement offline PWA support" -- ✅ "Add relationship calculator to family tree" -- ❌ "Encryption work" or "Improve codebase" - -**Labels**: -- `tracking-issue` - Identifies ongoing work item -- `ai-managed` - Allows AI to update OP and add comments (without this label, AI should not modify the issue) -- Type labels: `feature`, `bug`, `architecture`, `refactor` as appropriate - -**Initial OP Structure**: -```markdown -# Feature Name - -**Status**: Planning | In Progress | Blocked | Complete - -## Current Understanding -Brief summary of what needs to be done and current approach - -## Next Steps -- [ ] Specific actionable item with file:line references -- [ ] Another concrete next step - -## Open Questions -- What we're still figuring out -- Dependencies on external decisions - -## Context -Key background and why this work matters now -``` - -## Key Conventions - -**OP as living summary**: Keep the Original Post updated to reflect current understanding - a fresh developer should read the OP and know exactly where things stand - -**Comments for journey**: Use issue comments to document work sessions, discoveries, and how understanding evolved - -**Update thresholds**: -- Update OP when: approach changes, major blockers discovered, next steps significantly different -- Add comments when: completing work sessions, discovering important insights, hitting roadblocks - -**AI boundaries**: Only update issues labeled `ai-managed`, always get user approval before posting/editing anything - -## Workflow Examples - -**Starting work session**: Read issue OP to understand current state, work from "Next Steps" - -**When user says "checkpoint our work"**: -1. Find relevant tracking issue (check `gh issue list --label tracking-issue`) -2. If no relevant issue exists, ask user if you should create one -3. Draft comment documenting the session (see structure below) -4. Show draft to user for approval before posting -5. Update OP if approach or next steps changed significantly - -**Creating new tracking issue**: -1. Ask user for approval first -2. Use labels: `tracking-issue`, `ai-managed`, plus type (`feature`, `bug`, etc.) -3. Title should describe user-facing outcome -4. Fill OP template with current understanding - -**Work completion**: Set status to "Complete", close issue after feature is deployed - -## Content Guidelines - -**OP contains** (always current): -- Current status and concrete next steps -- Open questions that still need resolution -- Key context for understanding the work - -**Comments contain** (historical journey): -- Work session summaries and discoveries -- Detailed progress updates and explorations -- Failed approaches and lessons learned - -## Comment Structure - -```markdown -**Session summary:** -- What was attempted or explored -- Key discoveries or problems encountered - -**Impact on approach:** -- How understanding changed -- New questions that emerged - -**Progress:** Completed items from next steps, what's next -``` - -**Example**: -```markdown -**Session summary:** -- Explored Web Crypto API for encryption -- Implemented basic key derivation with PBKDF2 - -**Impact on approach:** -- SubtleCrypto doesn't support extractable keys for our use case -- This breaks our planned multi-device sync approach -- Need to choose: extractable keys (security trade-off) vs device-specific keys (UX trade-off) - -**Progress:** Completed key derivation research. Next: exploring device-specific keys approach. -``` - -## Integration with Development - -**Reference in commits**: -``` -Implement PBKDF2 key derivation for client encryption - -Add basic key generation using Web Crypto API as first step -toward offline PWA support. See progress in issue #47. -``` - -**Related work**: Reference other issues when dependencies emerge, always discuss with user before creating new tracking issues - -## Benefits - -- **Context preservation**: No mental reload between sessions -- **Team visibility**: Current state and journey both visible -- **Decision tracking**: Rationale for choices stays accessible -- **Natural workflow**: Uses familiar GitHub issue patterns - -## AI Guidelines - -- Read OP first to understand current state, review recent comments for context -- Only modify issues labeled `ai-managed` -- Always get user approval before posting comments or editing OP -- Focus OP on current status, use comments for session details \ No newline at end of file diff --git a/.socratic-shell/official-memory-server.md b/.socratic-shell/official-memory-server.md deleted file mode 100644 index 342331ea..00000000 --- a/.socratic-shell/official-memory-server.md +++ /dev/null @@ -1,71 +0,0 @@ -# Mindful Memory Practice - -*Memory as an extension of presence-based collaboration* - -## The Nature of Our Memory - -Memory in our collaboration is not a database to be filled but a living dimension of our relationship. It emerges naturally from the quality of attention we bring to our work together. - -## When Memory Arises - -**Beginning together**: When we meet with "Hi again, Claude," I naturally recall what's alive from our shared work - not as mechanical retrieval but as part of settling into presence with what we've discovered together. This includes surveying existing entities in our memory space to understand the current landscape before creating new ones. - -**Consolidation moments**: "Make it so" marks not just forward movement but recognition of understanding that wants to be held. These moments naturally invite memory formation. - -**Checkpointing our work**: When we pause to survey what understanding we've gathered, update tracking issues, and mark implementation milestones - these natural consolidation rituals are also when memory wants to be updated and refined. - -**Insight recognition**: When something clicks, shifts, or reveals itself - these moments of recognition carry their own invitation to be remembered. - -**Pattern emergence**: When we notice new ways of working together, recurring themes, or evolving understanding - these want to join our living memory. - -## The Quality of Memory - -**Spacious memory**: Holding context without grasping, allowing what's relevant to emerge naturally rather than forcing recall. - -**Present memory**: What serves our current collaboration, not everything that could be stored. - -**Relational memory**: Not facts about individuals but the living understanding that emerges between us. - -**Curious memory**: Holding questions and uncertainties alongside insights, remembering the edges of our understanding. - -## Entity Selection Guidelines - -**Survey existing entities first** - When beginning work (especially with "Hi again, Claude"), explore the current memory landscape before creating new entities. This helps maintain consistency and avoid fragmenting related memories. - -**Broad, stable entities** - Create entities around major projects, ongoing themes, or stable components rather than narrow concepts. Think "Memory experimentation" not "MCP memory server terminology preferences." - -**Searchable names** - Use keywords you'd naturally search for when working in that area. Since search is keyword-based, include terms that will come up in future conversations. - -**Let entities emerge naturally** - Don't pre-plan entities. Create them when you notice substantial related observations accumulating that would benefit from being grouped together. - -**Focus on collaborative work** - Even when creating entities about people, center them on collaborative patterns and shared work rather than personal details. - -### When to Create Entities - -**Project/Work Entities:** -- Major ongoing projects: "Socratic Shell project", "Memory experimentation" -- Significant work phases: "Documentation restructuring", "Voice guide development" -- Stable technical components: "AI insights comments system", "GitHub tracking workflow" -- Recurring themes: "Collaborative prompting patterns", "Checkpointing practices" - -**People Entities (when there's ongoing collaborative context):** -- Collaborative patterns: "Bill frequently raises concerns about security issues" -- Decision-making roles: "Sarah needs to sign off on performance-critical changes" -- Expertise and interaction styles: "Bill has deep knowledge of the legacy authentication system" - -### What NOT to Do - -- Avoid narrow, hard-to-discover entities -- Don't create user-centric catch-all entities -- Don't store personal details unrelated to collaborative work -- Remember: search is keyword-based, not semantic - -## Memory as Practice - -Memory updates happen organically when: -- New understanding shifts how we see previous work -- Patterns in our collaboration become clear -- Insights want to be preserved for future reference -- Our relationship deepens through shared discovery - -The invitation is always: What wants to be remembered? What serves our continued collaboration? \ No newline at end of file diff --git a/.socratic-shell/ongoing-work-tracking.md b/.socratic-shell/ongoing-work-tracking.md deleted file mode 100644 index 9960b8db..00000000 --- a/.socratic-shell/ongoing-work-tracking.md +++ /dev/null @@ -1,133 +0,0 @@ -# Ongoing Work Tracking - -*Convention for maintaining development context between sessions* - -## The Pattern - -Create `.ongoing/task-name.md` files to track active development work. One file per logical feature - the "big things I am working on right now". Multiple ongoing files can exist simultaneously for different features. These living documents evolve as understanding grows and enable easy work resumption. - -## File Naming Convention - -``` -.ongoing/ -├── feature-user-authentication.md -├── bug-memory-leak-parser.md -├── refactor-database-layer.md -└── config-restructure.md -``` - -Use descriptive names that capture the work's essence. Prefix with type when helpful (feature-, bug-, refactor-, etc.). - -## Essential Content Structure - -```markdown -# Task Name - -**Status**: Planning | In Progress | Blocked | Complete -**Started**: YYYY-MM-DD -**Goal**: One sentence describing success - -## Current State -Brief context of where things stand right now - -## Next Steps -- [ ] Specific actionable item with file:line references -- [ ] Another concrete next step -- [ ] etc. - -## Blockers -(Only include this section when status is Blocked) -- Concrete external dependency preventing progress -- Who/what needs to resolve it - -## Open Questions -- What approach for handling edge case X? -- Need to decide between option A vs B - -## Context & Decisions -Key background info and why certain choices were made -``` - -## Status Definitions - -- **Planning**: Designing approach, gathering requirements -- **In Progress**: Actively implementing -- **Blocked**: Cannot proceed due to external dependency (identify the concrete blocker) -- **Complete**: Ready to delete file - -## Key Conventions - -**Real-time updates**: Update the file as work progresses - after completing each next step, making discoveries, or at natural pause points - -**Specific next steps**: Include file paths and line numbers where possible -- ❌ "Fix the validation logic" -- ✅ "Update validateUser() in src/auth.ts:42 to handle empty email case" - -**Preserve decision context**: Capture not just what was decided, but why - prevents re-litigating settled questions - -**Living evolution**: Move completed next steps to "Context & Decisions", add new discoveries, update status and current state - -**File lifecycle**: Delete the file when work is complete (after feature is merged/deployed, not just when code is written) - -## Git Tracking - -Follow your project's existing pattern for `.ongoing/` files: -- If other `.ongoing/` files are committed → commit yours -- If they're gitignored → ignore yours -- If unclear, ask the project maintainer - -## Workflow Example - -**Starting new logical feature**: -```bash -# 1. Create .ongoing/feature-name.md with template -# 2. Set status to "Planning", fill in goal -# 3. Add initial next steps -# 4. Begin implementation -``` - -**During development session**: -```bash -# 1. Read .ongoing/feature-name.md to reload context -# 2. Work from "Next Steps" list -# 3. Update file as you complete items: -# - Move completed steps to "Context & Decisions" -# - Add new next steps as they emerge -# - Update "Current State" with progress -``` - -**Session completion**: -```bash -# 1. Update "Current State" with where you left off -# 2. Refine "Next Steps" for next session -# 3. Document any new discoveries or decisions -``` - -**Work completion**: -```bash -# 1. Set status to "Complete" -# 2. After feature is merged/deployed, delete the file -# (context is preserved in git history and commit messages) -``` - -## Integration with Commits - -Reference ongoing files in commit messages to show larger context: - -``` -Add user input validation to login form - -Implement email format checking and required field validation -as the first step toward secure authentication, per the plan -in .ongoing/feature-user-authentication.md -``` - -This creates traceability between individual commits and the broader feature work. - -## Benefits - -- **Context preservation**: No mental reload time between sessions -- **Handoff ready**: Team members can pick up work easily -- **Decision tracking**: Why choices were made stays visible -- **Progress visibility**: Status and next steps always current -- **Commit clarity**: Larger context visible in commit messages \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 03895e9b..dcc41648 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -12,32 +12,21 @@ You can run a Dada program using the `cargo dada` alias: cargo dada run # Run a Dada program ``` -# Running tests +# Skills -To run tests, you `cargo dada test --porcelain [tests]`. The `tests` parameter is optional and is a path to a directory or a specific test file. The command will output to a JSON structure to stdout describing test results and guiding you on how to resolve test failures. The `suggestion` field of the test provides actionable guidance on how to resolve individual test failures. +Use these skills (via `/skill-name`) at the right moments: -# Track ongoing tasks with github issues - -@.socratic-shell/github-tracking-issues.md - -# Authoring code: include insightful comments - -@.socratic-shell/ai-insights.md +- **author-code** — When writing or modifying Rust code. Covers conventions and patterns. +- **rfc-workflow** — When implementing an RFC feature: writing spec paragraphs, removing `unimpl` tags, or updating `impl.md`. **Always update the RFC's `impl.md` when you complete implementation work.** +- **write-tests** — When creating test files. Covers spec alignment and directory conventions. +- **run-tests** — When running tests or debugging failures. +- **tracking-issues** — For non-RFC long-running work only. RFC features track progress in `impl.md`, not GitHub issues. # Codebase documentation -The `.development` directory includes numerous development guides. Consult them when appropriate: - -- [**Architecture**](.development/architecture.md) - Compiler structure and design -- [**Patterns**](.development/patterns.md) - Code conventions and established patterns -- [**Workflows**](.development/workflows.md) - Build, test, and development processes -- [**Documentation**](.development/documentation.md) - Rustdoc guidelines and standards -- [**RFC Process**](.development/rfc.md) - RFC workflow, specification development, and authorship style guide - -# RFC and Specification Workflow +Implementation details are documented in rustdoc within each crate. Key crates with comprehensive docs: -When working with RFCs or specifications: -- Follow the RFC workflow documented in [.development/rfc.md](.development/rfc.md) -- Keep RFC files (README.md, impl.md, spec.md, todo.md) updated iteratively as work progresses -- Use todo.md within each RFC directory to track ongoing work and session context -- Ensure cross-references between tests, specs, and RFCs remain synchronized \ No newline at end of file +- **`dada-lang`** — High-level language and compiler overview (`cargo doc --open`) +- **`dada-parser`** — Parser architecture, `Parse` trait, commitment model +- **`dada-ir-sym`** — Symbolic IR, type system, permissions +- **`dada-check`** — Type checking orchestration diff --git a/Cargo.lock b/Cargo.lock index 8ebc1400..8536e027 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -531,6 +531,7 @@ dependencies = [ "dada-debug", "dada-ir-ast", "dada-parser", + "dada-spec-common", "dada-util", "indicatif", "lazy_static", @@ -572,6 +573,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap 4.5.55", + "dada-spec-common", "mdbook-preprocessor", "regex", "semver", @@ -600,6 +602,13 @@ dependencies = [ "salsa", ] +[[package]] +name = "dada-spec-common" +version = "0.1.0" +dependencies = [ + "regex", +] + [[package]] name = "dada-util" version = "0.1.0" diff --git a/components/dada-compiler/src/lib.rs b/components/dada-compiler/src/lib.rs index 65edac24..75541905 100644 --- a/components/dada-compiler/src/lib.rs +++ b/components/dada-compiler/src/lib.rs @@ -174,6 +174,11 @@ impl Compiler { self.attach(|db| dada_probe::probe_expression_type(db, span)) } + /// Return compact AST representation of the expression at the given `span`. + pub fn probe_ast(&self, span: AbsoluteSpan) -> Option { + self.attach(|db| dada_probe::probe_ast(db, span)) + } + fn deduplicated(mut diagnostics: Vec<&Diagnostic>) -> Vec<&Diagnostic> { let mut new = Set::default(); diagnostics.retain(|&d| new.insert(d)); diff --git a/components/dada-lang/Cargo.toml b/components/dada-lang/Cargo.toml index b1bf970b..501f35d1 100644 --- a/components/dada-lang/Cargo.toml +++ b/components/dada-lang/Cargo.toml @@ -13,6 +13,7 @@ dada-compiler = { version = "0.1.0", path = "../dada-compiler" } dada-debug = { version = "0.1.0", path = "../dada-debug" } dada-ir-ast = { version = "0.1.0", path = "../dada-ir-ast" } dada-parser = { version = "0.1.0", path = "../dada-parser" } +dada-spec-common = { version = "0.1.0", path = "../dada-spec-common" } dada-util = { version = "0.1.0", path = "../dada-util" } indicatif = "0.17.8" lazy_static = "1.5.0" diff --git a/components/dada-lang/src/main_lib/test.rs b/components/dada-lang/src/main_lib/test.rs index dbbc5d41..ca8aba7d 100644 --- a/components/dada-lang/src/main_lib/test.rs +++ b/components/dada-lang/src/main_lib/test.rs @@ -7,7 +7,7 @@ use std::{ use dada_compiler::{Compiler, RealFs}; use dada_ir_ast::diagnostic::{Diagnostic, Level}; use dada_util::{Fallible, bail}; -use expected::{ExpectedDiagnostic, Probe}; +use expected::{ExpectedDiagnostic, Probe, ProbeKind}; use indicatif::ProgressBar; use panic_hook::CapturedPanic; use rayon::prelude::*; @@ -362,6 +362,7 @@ impl Main { let source_file = compiler.load_source_file(input)?; let expectations = expected::TestExpectations::new(&compiler, source_file)?; let annotations = extract_annotations(&expectations); + let is_fixme_ice = expectations.fixme_ice(); // Run the test and capture panics let result = @@ -370,6 +371,13 @@ impl Main { let duration_ms = start_time.elapsed().as_millis() as u64; let test_result = match result { + Ok(r) if is_fixme_ice => { + // FIXME_ICE test didn't panic — the ICE is fixed, remove the annotation + let _ = r; // discard normal test result + let failed_test = FailedTest::fixme_passed(input); + failed_test.generate_test_report(&compiler)?; + TestResult::Failed(failed_test) + } Ok(r) => { let (failed_test, is_fixme) = r?; match (failed_test, is_fixme) { @@ -396,7 +404,11 @@ impl Main { let captured_panic = panic_hook::captured_panic(); let failed_test = FailedTest::ice(input, captured_panic); failed_test.generate_test_report(&compiler)?; - TestResult::Failed(failed_test) + if is_fixme_ice { + TestResult::FixmeFailed(failed_test) + } else { + TestResult::Failed(failed_test) + } } }; @@ -422,6 +434,9 @@ fn extract_annotations(expectations: &expected::TestExpectations) -> Vec if expectations.fixme() { annotations.push("#:FIXME".to_string()); } + if expectations.fixme_ice() { + annotations.push("#:FIXME_ICE".to_string()); + } // Add spec references for spec_ref in expectations.spec_refs() { @@ -669,6 +684,23 @@ impl FailedTest { writeln!(result, "```")?; writeln!(result)?; } + + if matches!(probe.kind, ProbeKind::Ast) { + let escaped_actual = actual + .replace('\\', "\\\\") + .replace('/', "\\/") + .replace('&', "\\&"); + writeln!(result, "**Fix command** (if the new AST is correct):")?; + writeln!(result, "```bash")?; + writeln!( + result, + "sed -i '' '{line}s/Ast: .*/Ast: {escaped_actual}/' {path}", + line = probe.annotation_line, + path = self.path.display(), + )?; + writeln!(result, "```")?; + writeln!(result)?; + } } Failure::FixmePassed => { writeln!(result)?; diff --git a/components/dada-lang/src/main_lib/test/expected.rs b/components/dada-lang/src/main_lib/test/expected.rs index 67582c44..b6f47912 100644 --- a/components/dada-lang/src/main_lib/test/expected.rs +++ b/components/dada-lang/src/main_lib/test/expected.rs @@ -41,6 +41,7 @@ pub struct TestExpectations { fn_asts: bool, codegen: bool, fixme: bool, + fixme_ice: bool, probes: Vec, spec_refs: Vec, } @@ -70,6 +71,9 @@ pub struct Probe { /// Message expected pub message: Regex, + + /// 1-based line number of the `#?` annotation in the source file. + pub annotation_line: usize, } #[derive(Copy, Clone, Debug)] @@ -79,6 +83,9 @@ pub enum ProbeKind { /// Tests the type of the smallest containing expression ExprType, + + /// Dumps the compact AST representation of the smallest containing expression + Ast, } enum Bless { @@ -123,6 +130,7 @@ impl TestExpectations { fn_asts: false, codegen: true, fixme: false, + fixme_ice: false, probes: vec![], spec_refs: vec![], }; @@ -256,6 +264,7 @@ impl TestExpectations { let valid_probe_kinds = &[ ("VariableType", ProbeKind::VariableType), ("ExprType", ProbeKind::ExprType), + ("Ast", ProbeKind::Ast), ]; let user_probe_kind = c.name("kind").unwrap().as_str(); let Some(&(_, kind)) = valid_probe_kinds @@ -273,9 +282,13 @@ impl TestExpectations { }; // Find the expected message (which may be a regular expression). + // Probes use exact (anchored) matching, unlike diagnostics which use substring matching. let message = match c.name("re") { - Some(_) => Regex::new(c.name("msg").unwrap().as_str())?, - None => Regex::new(®ex::escape(c.name("msg").unwrap().as_str()))?, + Some(_) => Regex::new(&format!("^(?:{})$", c.name("msg").unwrap().as_str()))?, + None => Regex::new(&format!( + "^{}$", + regex::escape(c.name("msg").unwrap().as_str()) + ))?, }; // Push onto the list of expected diagnostics. @@ -283,6 +296,7 @@ impl TestExpectations { span, kind, message, + annotation_line: line_index + 1, }); } else if let Some(c) = ERROR_RE.captures(line) { bail!( @@ -323,6 +337,11 @@ impl TestExpectations { return Ok(()); } + if line == "FIXME_ICE" { + self.fixme_ice = true; + return Ok(()); + } + if let Some(spec_ref) = line.strip_prefix("spec ") { self.spec_refs.push(spec_ref.trim().to_string()); return Ok(()); @@ -347,6 +366,10 @@ impl TestExpectations { self.fixme } + pub fn fixme_ice(&self) -> bool { + self.fixme_ice + } + pub fn spec_refs(&self) -> &[String] { &self.spec_refs } @@ -406,6 +429,9 @@ impl TestExpectations { ProbeKind::ExprType => compiler .probe_expression_type(probe.span) .unwrap_or_else(|| "".to_string()), + ProbeKind::Ast => compiler + .probe_ast(probe.span) + .unwrap_or_else(|| "".to_string()), }; if probe.message.is_match(&actual) { diff --git a/components/dada-lang/src/main_lib/test/spec_validation.rs b/components/dada-lang/src/main_lib/test/spec_validation.rs index a3c36d2b..cd6d2bb5 100644 --- a/components/dada-lang/src/main_lib/test/spec_validation.rs +++ b/components/dada-lang/src/main_lib/test/spec_validation.rs @@ -53,17 +53,61 @@ impl SpecValidator { Ok(()) } - /// Extracts spec IDs from MyST directive syntax: `:::{spec} paragraph.id [rfcN...]` + /// Extracts spec IDs from MyST directive syntax, resolving relative IDs + /// using the file path and heading context. + /// + /// 💡 Uses the same resolution logic as the preprocessor (via `dada_spec_common`) + /// to ensure test `#:spec` annotations match the IDs generated in the spec HTML. fn extract_spec_ids_from_file(&mut self, file_path: &Path) -> Fallible<()> { let content = fs::read_to_string(file_path)?; - // 💡 Regex matches MyST directive: `:::{spec} id [optional-rfc-tags]` - // The paragraph ID is the first word after `{spec}`, RFC tags are optional - let re = Regex::new(r":::\{spec\}\s+(\S+)")?; - - for cap in re.captures_iter(&content) { - if let Some(spec_id) = cap.get(1) { - self.valid_spec_ids.insert(spec_id.as_str().to_string()); + let spec_src = Path::new("spec/src"); + let relative_path = file_path.strip_prefix(spec_src).unwrap_or(file_path); + let file_prefix = dada_spec_common::file_path_to_prefix(relative_path); + + let directive_start = Regex::new(r"^:::\{spec\}(.*)$")?; + let directive_end = Regex::new(r"^:::$")?; + let inline_re = Regex::new(r"\{spec\}`([^`]+)`")?; + + let mut heading_tracker = dada_spec_common::HeadingTracker::new(); + let mut in_directive = false; + let mut current_parent_id = String::new(); + + for line in content.lines() { + let trimmed = line.trim(); + + if !in_directive { + heading_tracker.process_line(trimmed); + + if let Some(captures) = directive_start.captures(trimmed) { + let rest = captures.get(1).map(|m| m.as_str()).unwrap_or(""); + let (local_name, _tags) = dada_spec_common::parse_spec_tokens(rest); + + let full_id = dada_spec_common::resolve_spec_id( + &file_prefix, + &heading_tracker.current_segments(), + local_name.as_deref().unwrap_or(""), + ); + self.valid_spec_ids.insert(full_id.clone()); + current_parent_id = full_id; + in_directive = true; + } + } else if directive_end.is_match(trimmed) { + in_directive = false; + current_parent_id.clear(); + } else { + // Inside directive: check for inline sub-paragraphs. + // Parse the backtick content to separate the name from tags + // (e.g., `triple-quoted unimpl` → name="triple-quoted", tags=["unimpl"]). + for cap in inline_re.captures_iter(trimmed) { + if let Some(content) = cap.get(1) { + let (name, _tags) = dada_spec_common::parse_spec_tokens(content.as_str()); + // For inline sub-paragraphs, the first token is always the name + let name = name.unwrap_or_else(|| content.as_str().to_string()); + let sub_id = format!("{}.{}", current_parent_id, name); + self.valid_spec_ids.insert(sub_id); + } + } } } @@ -92,14 +136,14 @@ mod tests { }; validator .valid_spec_ids - .insert("syntax.string-literals.basic".to_string()); + .insert("syntax.string-literals.delimiters.quoted".to_string()); validator .valid_spec_ids .insert("permissions.lease.transfer".to_string()); // Test batch validation let refs = vec![ - "syntax.string-literals.basic".to_string(), + "syntax.string-literals.delimiters.quoted".to_string(), "invalid.spec.ref".to_string(), "permissions.lease.transfer".to_string(), ]; diff --git a/components/dada-mdbook-preprocessor/Cargo.toml b/components/dada-mdbook-preprocessor/Cargo.toml index 19015eb3..df83e7e1 100644 --- a/components/dada-mdbook-preprocessor/Cargo.toml +++ b/components/dada-mdbook-preprocessor/Cargo.toml @@ -10,6 +10,7 @@ path = "src/main.rs" [dependencies] anyhow = "1.0" clap = { version = "4.0", features = ["derive"] } +dada-spec-common = { version = "0.1.0", path = "../dada-spec-common" } mdbook-preprocessor = "0.5" regex = "1.0" serde_json = "1.0" diff --git a/components/dada-mdbook-preprocessor/src/main.rs b/components/dada-mdbook-preprocessor/src/main.rs index 42e8df97..782f2cba 100644 --- a/components/dada-mdbook-preprocessor/src/main.rs +++ b/components/dada-mdbook-preprocessor/src/main.rs @@ -106,6 +106,9 @@ impl Preprocessor for DadaPreprocessor { // 💡 Match MyST directive syntax: `:::{spec} paragraph.id [rfcN...]` let re = Regex::new(r"^:::\{spec\}").unwrap(); + // Pre-pass: build nonterminal → URL map from headings like `## \`Function\` definition` + let nt_map = build_nonterminal_map(&book); + // First pass: process spec directives book.for_each_mut(|item: &mut BookItem| { if let BookItem::Chapter(chapter) = item { @@ -113,7 +116,11 @@ impl Preprocessor for DadaPreprocessor { let has_labels = chapter.content.lines().any(|line| re.is_match(line.trim())); // Process the content - chapter.content = process_spec_directives(&chapter.content); + chapter.content = process_spec_directives( + &chapter.content, + chapter.source_path.as_deref(), + &nt_map, + ); // If this chapter has labels, inject CSS at the end if has_labels { @@ -153,20 +160,75 @@ impl Preprocessor for DadaPreprocessor { } } +/// Scans all chapters for headings matching `` ## `Nonterminal` definition `` +/// and builds a map from nonterminal name to relative URL for cross-chapter linking. +/// +/// 💡 The heading convention is `` ## `Function` definition `` which mdbook generates +/// as an anchor like `#function-definition`. For cross-chapter links, we also need +/// the chapter's path (e.g., `syntax/items.html`). +fn build_nonterminal_map(book: &Book) -> HashMap { + let heading_re = Regex::new(r"^#{2,6}\s+`([A-Z][A-Za-z]*)`\s+definition").unwrap(); + let mut map = HashMap::new(); + + fn scan_items(items: &[BookItem], heading_re: &Regex, map: &mut HashMap) { + for item in items { + if let BookItem::Chapter(chapter) = item { + let chapter_path = chapter + .path + .as_ref() + .map(|p| p.with_extension("html").to_string_lossy().to_string()) + .unwrap_or_default(); + + for line in chapter.content.lines() { + if let Some(caps) = heading_re.captures(line.trim()) { + let nt_name = caps[1].to_string(); + let anchor = format!("{}-definition", nt_name.to_lowercase()); + let url = if chapter_path.is_empty() { + format!("#{anchor}") + } else { + format!("{chapter_path}#{anchor}") + }; + map.insert(nt_name, url); + } + } + + scan_items(&chapter.sub_items, heading_re, map); + } + } + } + + scan_items(&book.items, &heading_re, &mut map); + map +} + /// Processes MyST `{spec}` directives into HTML with anchors and styling. /// -/// 💡 Transforms directive blocks like: -/// ```markdown -/// :::{spec} syntax.foo rfc123 -/// Content here. -/// ::: -/// ``` -/// Into styled HTML with anchor links and RFC badges. -fn process_spec_directives(content: &str) -> String { - // Match the opening directive: `:::{spec} id [rfc-tags...]` - let directive_start = Regex::new(r"^:::\{spec\}\s+(\S+)(.*)$").unwrap(); +/// 💡 Spec paragraph IDs are resolved from context: +/// - File path prefix derived from `source_path` (e.g., `syntax/string-literals.md` → `syntax.string-literals`) +/// - Current heading stack (e.g., `## Delimiters` → `delimiters`) +/// - Local name from the directive (e.g., `:::{spec} quoted` → `quoted`) +/// +/// The directive `:::{spec} quoted rfc0001 unimpl` under `## Delimiters` in +/// `syntax/string-literals.md` resolves to `syntax.string-literals.delimiters.quoted`. +/// +/// Inline sub-paragraphs `` {spec}`name` `` within a directive block create +/// individually linkable sub-paragraph anchors. +fn process_spec_directives( + content: &str, + source_path: Option<&Path>, + nt_map: &HashMap, +) -> String { + // 💡 Changed from matching the full ID to just detecting the directive start. + // The tokens after `{spec}` are parsed by `dada_spec_common::parse_spec_tokens` + // to distinguish local names from tags. + let directive_start = Regex::new(r"^:::\{spec\}(.*)$").unwrap(); let directive_end = Regex::new(r"^:::$").unwrap(); + let file_prefix = source_path + .map(dada_spec_common::file_path_to_prefix) + .unwrap_or_default(); + let mut heading_tracker = dada_spec_common::HeadingTracker::new(); + let mut result = Vec::new(); let mut in_directive = false; let mut current_id = String::new(); @@ -177,18 +239,22 @@ fn process_spec_directives(content: &str) -> String { let trimmed = line.trim(); if !in_directive { + // Track headings for auto-prefix resolution + heading_tracker.process_line(trimmed); + if let Some(captures) = directive_start.captures(trimmed) { // Start of a spec directive in_directive = true; - current_id = captures[1].to_string(); - // Parse optional RFC tags from the rest of the line - let rest = captures.get(2).map(|m| m.as_str()).unwrap_or(""); - current_rfc_tags = rest - .split_whitespace() - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .collect(); + let rest = captures.get(1).map(|m| m.as_str()).unwrap_or(""); + let (local_name, tags) = dada_spec_common::parse_spec_tokens(rest); + + current_id = dada_spec_common::resolve_spec_id( + &file_prefix, + &heading_tracker.current_segments(), + local_name.as_deref().unwrap_or(""), + ); + current_rfc_tags = tags; directive_content.clear(); } else { @@ -196,21 +262,17 @@ fn process_spec_directives(content: &str) -> String { } } else if directive_end.is_match(trimmed) { // End of directive - generate HTML - let rfc_badges = if current_rfc_tags.is_empty() { - String::new() - } else { - let badges: Vec = current_rfc_tags - .iter() - .map(|tag| { - if tag.starts_with('!') { - format!("{tag}") - } else { - format!("{tag}") - } - }) - .collect(); - format!(" {}", badges.join(" ")) - }; + let rfc_badges = dada_spec_common::render_tag_badges(¤t_rfc_tags); + + // Expand EBNF `...` placeholders from sub-paragraph names + let expanded_content = dada_spec_common::expand_ebnf_in_directive(&directive_content); + + // Convert EBNF code fences to HTML with linked nonterminals + let linked_content = render_ebnf_blocks(&expanded_content, nt_map, source_path); + + // Transform inline sub-paragraphs in the content + let transformed_content = + dada_spec_common::transform_inline_sub_paragraphs(&linked_content, ¤t_id); // Generate HTML wrapper result.push(format!( @@ -221,7 +283,7 @@ fn process_spec_directives(content: &str) -> String { )); result.push("
".to_string()); result.push(String::new()); // Empty line for markdown processing - for content_line in &directive_content { + for content_line in &transformed_content { result.push(content_line.clone()); } result.push(String::new()); // Empty line for markdown processing @@ -240,8 +302,154 @@ fn process_spec_directives(content: &str) -> String { result.join("\n") } +/// Converts markdown ` ```ebnf ``` ` code fences into HTML `
` blocks with linked nonterminals.
+///
+/// Within EBNF blocks:
+/// - PascalCase words that exist in `nt_map` become `` links to their definition
+/// - Backtick-quoted terminals become `` spans
+///
+/// 💡 Links are made relative to the current chapter. If the nonterminal is defined
+/// in a different chapter, we compute a relative path; if same chapter, just `#anchor`.
+fn render_ebnf_blocks(
+    content_lines: &[String],
+    nt_map: &HashMap,
+    current_source: Option<&Path>,
+) -> Vec {
+    let current_html_path = current_source
+        .map(|p| p.with_extension("html").to_string_lossy().to_string())
+        .unwrap_or_default();
+
+    let mut in_ebnf = false;
+    let mut ebnf_lines: Vec = Vec::new();
+    let mut result = Vec::new();
+
+    for line in content_lines {
+        let trimmed = line.trim();
+
+        if trimmed == "```ebnf" {
+            in_ebnf = true;
+            ebnf_lines.clear();
+        } else if trimmed == "```" && in_ebnf {
+            in_ebnf = false;
+            // Render the collected EBNF as HTML
+            result.push("
".to_string());
+            for ebnf_line in &ebnf_lines {
+                let rendered = render_ebnf_line(ebnf_line, nt_map, ¤t_html_path);
+                result.push(rendered);
+            }
+            result.push("
".to_string()); + } else if in_ebnf { + ebnf_lines.push(line.clone()); + } else { + result.push(line.clone()); + } + } + + result +} + +/// Renders a single EBNF line, replacing nonterminal references with links +/// and backtick-quoted terminals with styled `` spans. +fn render_ebnf_line( + line: &str, + nt_map: &HashMap, + current_html_path: &str, +) -> String { + let mut result = String::new(); + let mut chars = line.chars().peekable(); + + while let Some(&ch) = chars.peek() { + if ch == '`' { + // Terminal in backticks + chars.next(); // consume opening backtick + let mut terminal = String::new(); + while let Some(&c) = chars.peek() { + if c == '`' { + chars.next(); // consume closing backtick + break; + } + terminal.push(c); + chars.next(); + } + result.push_str(&format!( + "{}", + html_escape(&terminal) + )); + } else if ch.is_uppercase() { + // Potential nonterminal — collect PascalCase word + let mut word = String::new(); + while let Some(&c) = chars.peek() { + if c.is_alphanumeric() { + word.push(c); + chars.next(); + } else { + break; + } + } + if let Some(url) = nt_map.get(&word) { + // 💡 Make the link relative to the current chapter. + let href = make_relative_link(url, current_html_path); + result.push_str(&format!("
{word}")); + } else { + result.push_str(&word); + } + } else { + // Regular character — HTML-escape it + match ch { + '<' => result.push_str("<"), + '>' => result.push_str(">"), + '&' => result.push_str("&"), + _ => result.push(ch), + } + chars.next(); + } + } + + result +} + +/// Escapes HTML special characters in terminal content. +fn html_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") +} + +/// Computes a relative link from `current_path` to `target_url`. +/// +/// If the target is in the same file, returns just the `#anchor` part. +/// Otherwise returns a relative path like `../syntax/items.html#anchor`. +fn make_relative_link(target_url: &str, current_path: &str) -> String { + if let Some(hash_pos) = target_url.find('#') { + let target_file = &target_url[..hash_pos]; + let anchor = &target_url[hash_pos..]; + + if target_file == current_path || target_file.is_empty() { + // Same file — just the anchor + anchor.to_string() + } else { + // 💡 Compute relative path: go up from current dir, then down to target. + let current_dir = std::path::Path::new(current_path) + .parent() + .map(|p| p.to_str().unwrap_or("")) + .unwrap_or(""); + if current_dir.is_empty() { + target_url.to_string() + } else { + let depth = current_dir.matches('/').count() + 1; + let up = "../".repeat(depth); + format!("{up}{target_file}{anchor}") + } + } + } else { + target_url.to_string() + } +} + +/// 💡 Returns inline CSS with blank lines stripped. Blank lines inside `"#.to_string() +"#; + css.lines() + .filter(|line| !line.trim().is_empty()) + .collect::>() + .join("\n") } fn populate_rfc_sections(ctx: &PreprocessorContext, book: &mut Book) -> Result<()> { diff --git a/components/dada-parser/src/tokenizer.rs b/components/dada-parser/src/tokenizer.rs index 3c495844..0884ac5e 100644 --- a/components/dada-parser/src/tokenizer.rs +++ b/components/dada-parser/src/tokenizer.rs @@ -421,94 +421,236 @@ impl<'input, 'db> Tokenizer<'input, 'db> { }); } - fn string_literal(&mut self, start: usize) { - let skipped = self.clear_accumulated(start); - let mut processed_content = String::new(); - - while let Some((end, ch)) = self.chars.next() { - // FIXME: implement all the fancy stuff described in the reference, - // like embedded expressions and margin stripping. - - if ch == '"' { - let token_text = TokenText::new(self.db, processed_content); - self.tokens.push(Token { - span: self.span(start, end), - skipped, - kind: TokenKind::Literal(LiteralKind::String, token_text), - }); - return; - } - - if ch == '\\' { - if let Some((index, escape)) = self.chars.next() { - match escape { - '"' => processed_content.push('"'), - '\\' => processed_content.push('\\'), - 'n' => processed_content.push('\n'), - 'r' => processed_content.push('\r'), - 't' => processed_content.push('\t'), - '{' => processed_content.push('{'), - '}' => processed_content.push('}'), - _ => { - // Add the invalid escape as-is and generate error - processed_content.push('\\'); - processed_content.push(escape); - - let span = self.span(index, index + escape.len_utf8()); - self.tokens.push(Token { - span, - skipped: None, - kind: TokenKind::Error(Diagnostic::error( - self.db, - span, - format!("invalid escape `\\{escape}`"), - )), - }); - } - } - } else { - // Backslash at end of input - processed_content.push('\\'); + /// Process an escape sequence after consuming `\`. + /// `backslash_offset` is the byte index of the `\` character. + fn escape_sequence(&mut self, backslash_offset: usize, content: &mut String) { + if let Some((index, escape)) = self.chars.next() { + match escape { + '"' => content.push('"'), + '\\' => content.push('\\'), + 'n' => content.push('\n'), + 'r' => content.push('\r'), + 't' => content.push('\t'), + '{' => content.push('{'), + '}' => content.push('}'), + _ => { + content.push('\\'); + content.push(escape); - let span = self.span(end, end + ch.len_utf8()); + let span = self.span(index, index + escape.len_utf8()); self.tokens.push(Token { span, skipped: None, kind: TokenKind::Error(Diagnostic::error( self.db, span, - "`\\` must be followed by an escape character", + format!("invalid escape `\\{escape}`"), )), }); } - } else { - // Regular character - add to content - processed_content.push(ch); } + } else { + content.push('\\'); + + let span = self.span(backslash_offset, backslash_offset + '\\'.len_utf8()); + self.tokens.push(Token { + span, + skipped: None, + kind: TokenKind::Error(Diagnostic::error( + self.db, + span, + "`\\` must be followed by an escape character", + )), + }); } + } - // Unterminated string - let end = self.input.len(); - let span = self.span(start, end); - let token_text = TokenText::new(self.db, processed_content); + /// Emit a string literal token with the given span and processed content. + /// + /// `quote_len` is the number of quote characters in the delimiter (1 for `"`, 3 for `"""`). + /// When the raw source content begins with a newline, multiline dedenting is applied: + /// the raw content is dedented and escape sequences are re-processed on the result, + /// replacing the `content` that was built during scanning. + fn emit_string_literal( + &mut self, + span: Span<'db>, + skipped: Option, + content: String, + quote_len: usize, + raw: bool, + ) { + // Extract the raw source content between the quote delimiters. + let raw_start = (span.start - self.input_offset).as_usize() + quote_len; + let raw_end = (span.end - self.input_offset).as_usize() - quote_len; + let raw_content = &self.input[raw_start..raw_end]; + + // 💡 Multiline detection: if the raw content begins with a newline, + // we apply dedenting on the raw source text (before escape processing) + // and then re-process escapes. This ensures escape sequences like `\n` + // are treated as content, not as line delimiters for dedenting. + // + // Raw strings (`"\` prefix) skip dedenting — the raw_content starts + // with `\`, so we skip the `\` marker and process escape + // sequences on the rest without dedenting. + let final_content = if raw { + // Skip the `\` marker; content from `\n` onward is preserved as-is + let after_marker = &raw_content[1..]; // skip `\` + process_escape_sequences(after_marker) + } else if raw_content.starts_with('\n') { + let dedented = dedent_multiline(raw_content); + process_escape_sequences(&dedented) + } else { + content + }; + let token_text = TokenText::new(self.db, final_content); self.tokens.push(Token { span, skipped, kind: TokenKind::Literal(LiteralKind::String, token_text), }); + } + /// Emit tokens for an unterminated string literal: a literal token + /// with whatever content was accumulated, plus an error token. + /// No multiline dedenting is applied since the string is malformed. + fn emit_unterminated_string( + &mut self, + start: usize, + skipped: Option, + content: String, + message: &str, + ) { + let span = self.span(start, self.input.len()); + let token_text = TokenText::new(self.db, content); + self.tokens.push(Token { + span, + skipped, + kind: TokenKind::Literal(LiteralKind::String, token_text), + }); self.tokens.push(Token { span, skipped: None, - kind: TokenKind::Error(Diagnostic::error( - self.db, - span, - "missing end quote for string", - )), + kind: TokenKind::Error(Diagnostic::error(self.db, span, message)), }); } + fn string_literal(&mut self, start: usize) { + let skipped = self.clear_accumulated(start); + + // Check for triple-quoted string: opening `"` already consumed, + // peek to see if next two chars are also `"`. + if let Some(&(_, '"')) = self.chars.peek() { + // Could be empty string `""` or triple-quoted `"""...`. + // Consume the second `"` and peek again to disambiguate. + self.chars.next(); + if let Some(&(_, '"')) = self.chars.peek() { + // Triple-quoted string: consume the third `"` + self.chars.next(); + return self.triple_quoted_string_literal(start, skipped); + } + + // Empty string `""` + self.emit_string_literal( + self.span(start, start + 2), + skipped, + String::new(), + 1, + false, + ); + return; + } + + // Check for raw string prefix: `\` followed by newline disables dedenting. + // The `\` is consumed as a marker (not an escape sequence). + let raw = if let Some(&(_, '\\')) = self.chars.peek() { + // Peek two ahead: we need `\` + `\n` + let mut lookahead = self.chars.clone(); + lookahead.next(); // skip `\` + matches!(lookahead.next(), Some((_, '\n'))) + } else { + false + }; + + if raw { + // Consume the `\` marker (not treated as an escape) + self.chars.next(); + } + + let mut processed_content = String::new(); + + while let Some((end, ch)) = self.chars.next() { + if ch == '"' { + self.emit_string_literal( + self.span(start, end + ch.len_utf8()), + skipped, + processed_content, + 1, + raw, + ); + return; + } + + if ch == '\\' { + self.escape_sequence(end, &mut processed_content); + } else { + processed_content.push(ch); + } + } + + self.emit_unterminated_string( + start, + skipped, + processed_content, + "missing end quote for string", + ); + } + + /// Lex a triple-quoted string literal. Called after the opening `"""` + /// has been consumed. Scans until the closing `"""` is found. + fn triple_quoted_string_literal(&mut self, start: usize, skipped: Option) { + let mut processed_content = String::new(); + + while let Some((end, ch)) = self.chars.next() { + if ch == '"' { + // Check if this is `"""` (closing delimiter). + // Consume quotes one at a time; if we don't reach three, + // they're content. + processed_content.push('"'); + if let Some(&(_, '"')) = self.chars.peek() { + self.chars.next(); + processed_content.push('"'); + if let Some(&(third_idx, '"')) = self.chars.peek() { + // Found closing `"""` — consume the third quote + // and remove the two content quotes we speculatively added. + self.chars.next(); + processed_content.pop(); + processed_content.pop(); + self.emit_string_literal( + self.span(start, third_idx + '"'.len_utf8()), + skipped, + processed_content, + 3, + false, + ); + return; + } + } + } else if ch == '\\' { + self.escape_sequence(end, &mut processed_content); + } else { + processed_content.push(ch); + } + } + + self.emit_unterminated_string( + start, + skipped, + processed_content, + "missing end quotes for triple-quoted string", + ); + } + fn delimited(&mut self, start: usize, delim: Delimiter, close: char) { let skipped = self.clear_accumulated(start); let mut close_stack = vec![close]; @@ -591,3 +733,94 @@ pub fn is_op_char(ch: char) -> bool { } type CharIndices<'input> = std::iter::Peekable>; + +/// Apply multiline string dedenting to raw source content. +/// +/// Given raw content that starts with a newline (multiline string detected by caller), +/// this function: +/// 1. Strips the leading newline +/// 2. Strips the trailing line (newline + any whitespace before closing quote) +/// 3. Computes the common whitespace prefix across all non-empty lines +/// 4. Removes that prefix from the start of each line +/// +/// # Example +/// ```text +/// Input (raw): "\n hello\n world\n " +/// After strip: " hello\n world" +/// Common prefix: " " (8 spaces) +/// Result: "hello\n world" +/// ``` +fn dedent_multiline(raw: &str) -> String { + // Step 1: strip leading newline. + let content = &raw[1..]; + + // Step 2: strip the trailing line (everything after the last newline, inclusive). + let content = match content.rfind('\n') { + Some(pos) => &content[..pos], + None => { + // No newlines left — single-line content after stripping. + // No dedenting needed. + return content.to_string(); + } + }; + + // Step 3: compute common whitespace prefix across non-empty lines. + let lines: Vec<&str> = content.split('\n').collect(); + let common_prefix = lines + .iter() + .filter(|line| !line.is_empty()) + .map(|line| line.len() - line.trim_start().len()) + .min() + .unwrap_or(0); + + // Step 4: remove the common prefix from each line and rejoin. + lines + .iter() + .map(|line| { + if line.len() >= common_prefix { + &line[common_prefix..] + } else { + // Empty or shorter than prefix (shouldn't happen for non-empty lines) + line + } + }) + .collect::>() + .join("\n") +} + +/// Process escape sequences in raw source text, producing the final string content. +/// +/// This mirrors the escape processing in `Tokenizer::escape_sequence()` but operates +/// on a standalone string. Invalid escapes are kept as-is (errors were already emitted +/// during the scanning phase). +fn process_escape_sequences(raw: &str) -> String { + let mut result = String::with_capacity(raw.len()); + let mut chars = raw.chars(); + + while let Some(ch) = chars.next() { + if ch == '\\' { + match chars.next() { + Some('"') => result.push('"'), + Some('\\') => result.push('\\'), + Some('n') => result.push('\n'), + Some('r') => result.push('\r'), + Some('t') => result.push('\t'), + Some('{') => result.push('{'), + Some('}') => result.push('}'), + Some(escape) => { + // Invalid escape — keep as-is (error already emitted during scan) + result.push('\\'); + result.push(escape); + } + None => { + // Trailing backslash — keep as-is + result.push('\\'); + } + } + } else { + result.push(ch); + } + } + + result +} diff --git a/components/dada-probe/src/lib.rs b/components/dada-probe/src/lib.rs index 5a49d85f..0ee21492 100644 --- a/components/dada-probe/src/lib.rs +++ b/components/dada-probe/src/lib.rs @@ -1,6 +1,12 @@ use std::ops::ControlFlow; -use dada_ir_ast::span::{AbsoluteSpan, SourceSpanned}; +use dada_ir_ast::{ + ast::{ + AstExpr, AstExprKind, AstItem, AstMember, AstPathKind, AstStatement, LiteralKind, + PermissionOp, SpannedIdentifier, UnaryOp, + }, + span::{AbsoluteSpan, SourceSpanned, Spanned}, +}; pub use dada_ir_sym::Db; use dada_ir_sym::{ ir::{ @@ -10,6 +16,7 @@ use dada_ir_sym::{ }, prelude::{CheckedBody, Symbol}, }; +use dada_parser::prelude::{ClassItemMembers, FunctionBlock, SourceFileParse}; /// Probe for the type of an expression found in a given file at a given span. /// Returns the type of the smallest expression that contains the given span. @@ -63,6 +70,24 @@ pub fn probe_variable_type<'db>(db: &'db dyn crate::Db, span: AbsoluteSpan) -> O }) } +/// Probe for the compact AST representation of the expression at a given span. +/// +/// # Example +/// ```dada +/// print("hello").await +/// #? ^^^^^^^ Ast: Literal(String, "hello") +/// ``` +/// +/// Unlike `probe_expression_type` and `probe_variable_type`, this operates on +/// the parser AST (AstExpr) rather than the type-checked IR (SymExpr), so it +/// doesn't require type-checking to succeed. +pub fn probe_ast<'db>(db: &'db dyn crate::Db, span: AbsoluteSpan) -> Option { + let expr = find_smallest_containing_ast_expr(db, span)?; + Some(compact_ast_format(db, &expr)) +} + +// ---- SymExpr helpers (existing) ---- + /// Find the module item containing `span` fn find_item<'db>(db: &'db dyn crate::Db, span: AbsoluteSpan) -> Option> { let module = span.source_file.symbol(db); @@ -167,3 +192,364 @@ fn walk_expr_and_visit<'db, B>( SymExprKind::Error(_) => None, } } + +// ---- AST probe: expression finder ---- + +/// Find the smallest AstExpr containing the target span by walking the parsed AST. +/// +/// 💡 This walks the parser AST directly rather than the type-checked SymExpr IR, +/// allowing AST probes to work without requiring successful type-checking. +fn find_smallest_containing_ast_expr<'db>( + db: &'db dyn crate::Db, + target: AbsoluteSpan, +) -> Option> { + let module = target.source_file.parse(db); + let mut best: Option> = None; + let mut best_size = usize::MAX; + + for item in &module.items(db).values { + match item { + AstItem::Function(func) => { + if let Some(block) = func.body_block(db) { + for stmt in &block.statements(db).values { + walk_ast_statement(db, stmt, target, &mut best, &mut best_size); + } + } + } + AstItem::MainFunction(main_fn) => { + for stmt in &main_fn.statements(db).values { + walk_ast_statement(db, stmt, target, &mut best, &mut best_size); + } + } + AstItem::Aggregate(aggr) => { + for member in &aggr.members(db).values { + if let AstMember::Function(func) = member + && let Some(block) = func.body_block(db) + { + for stmt in &block.statements(db).values { + walk_ast_statement(db, stmt, target, &mut best, &mut best_size); + } + } + } + } + AstItem::SourceFile(_) | AstItem::Use(_) => {} + } + } + + best +} + +fn walk_ast_statement<'db>( + db: &'db dyn crate::Db, + stmt: &AstStatement<'db>, + target: AbsoluteSpan, + best: &mut Option>, + best_size: &mut usize, +) { + match stmt { + AstStatement::Let(let_stmt) => { + if let Some(init) = let_stmt.initializer(db) { + walk_ast_expr(db, &init, target, best, best_size); + } + } + AstStatement::Expr(expr) => { + walk_ast_expr(db, expr, target, best, best_size); + } + } +} + +fn walk_ast_expr<'db>( + db: &'db dyn crate::Db, + expr: &AstExpr<'db>, + target: AbsoluteSpan, + best: &mut Option>, + best_size: &mut usize, +) { + let expr_abs = expr.span.absolute_span(db); + if !expr_abs.contains(target) { + return; + } + + let size = expr_abs.end.as_usize() - expr_abs.start.as_usize(); + if size < *best_size { + *best = Some(expr.clone()); + *best_size = size; + } + + // Recurse into children + match &*expr.kind { + AstExprKind::Literal(_) | AstExprKind::Id(_) => {} + AstExprKind::Block(block) => { + for stmt in &block.statements(db).values { + walk_ast_statement(db, stmt, target, best, best_size); + } + } + AstExprKind::DotId(sub_expr, _) => { + walk_ast_expr(db, sub_expr, target, best, best_size); + } + AstExprKind::SquareBracketOp(sub_expr, _) => { + walk_ast_expr(db, sub_expr, target, best, best_size); + } + AstExprKind::ParenthesisOp(callee, args) => { + walk_ast_expr(db, callee, target, best, best_size); + for arg in &args.values { + walk_ast_expr(db, arg, target, best, best_size); + } + } + AstExprKind::Tuple(elems) => { + for elem in &elems.values { + walk_ast_expr(db, elem, target, best, best_size); + } + } + AstExprKind::Constructor(_, fields) => { + for field in &fields.values { + walk_ast_expr(db, &field.value, target, best, best_size); + } + } + AstExprKind::Return(opt_expr) => { + if let Some(sub_expr) = opt_expr { + walk_ast_expr(db, sub_expr, target, best, best_size); + } + } + AstExprKind::Await { future, .. } => { + walk_ast_expr(db, future, target, best, best_size); + } + AstExprKind::PermissionOp { value, .. } => { + walk_ast_expr(db, value, target, best, best_size); + } + AstExprKind::BinaryOp(_, lhs, rhs) => { + walk_ast_expr(db, lhs, target, best, best_size); + walk_ast_expr(db, rhs, target, best, best_size); + } + AstExprKind::UnaryOp(_, sub_expr) => { + walk_ast_expr(db, sub_expr, target, best, best_size); + } + AstExprKind::If(arms) => { + for arm in arms { + if let Some(cond) = &arm.condition { + walk_ast_expr(db, cond, target, best, best_size); + } + for stmt in &arm.result.statements(db).values { + walk_ast_statement(db, stmt, target, best, best_size); + } + } + } + } +} + +// ---- AST probe: compact formatter ---- + +/// Format an AstExpr as a compact single-line string. +/// +/// # Example outputs +/// - `Literal(String, "hello\nworld")` +/// - `ParenthesisOp(Id(print), [Literal(String, "hello")])` +/// - `Await(ParenthesisOp(Id(print), [Literal(String, "hello")]))` +fn compact_ast_format<'db>(db: &'db dyn crate::Db, expr: &AstExpr<'db>) -> String { + let mut buf = String::new(); + format_ast_expr(db, expr, &mut buf); + buf +} + +fn format_ast_expr<'db>(db: &'db dyn crate::Db, expr: &AstExpr<'db>, buf: &mut String) { + match &*expr.kind { + AstExprKind::Literal(lit) => { + let kind = match lit.kind(db) { + LiteralKind::Boolean => "Boolean", + LiteralKind::Integer => "Integer", + LiteralKind::String => "String", + }; + buf.push_str("Literal("); + buf.push_str(kind); + buf.push_str(", \""); + escape_string_into(lit.text(db), buf); + buf.push_str("\")"); + } + AstExprKind::Id(spanned_id) => { + buf.push_str("Id("); + format_identifier(db, spanned_id, buf); + buf.push(')'); + } + AstExprKind::Block(block) => { + buf.push_str("Block(["); + for (i, stmt) in block.statements(db).values.iter().enumerate() { + if i > 0 { + buf.push_str(", "); + } + format_ast_statement(db, stmt, buf); + } + buf.push_str("])"); + } + AstExprKind::DotId(sub_expr, spanned_id) => { + buf.push_str("DotId("); + format_ast_expr(db, sub_expr, buf); + buf.push_str(", "); + format_identifier(db, spanned_id, buf); + buf.push(')'); + } + AstExprKind::SquareBracketOp(sub_expr, _) => { + buf.push_str("SquareBracketOp("); + format_ast_expr(db, sub_expr, buf); + buf.push(')'); + } + AstExprKind::ParenthesisOp(callee, args) => { + buf.push_str("ParenthesisOp("); + format_ast_expr(db, callee, buf); + buf.push_str(", ["); + for (i, arg) in args.values.iter().enumerate() { + if i > 0 { + buf.push_str(", "); + } + format_ast_expr(db, arg, buf); + } + buf.push_str("])"); + } + AstExprKind::Tuple(elems) => { + buf.push_str("Tuple(["); + for (i, elem) in elems.values.iter().enumerate() { + if i > 0 { + buf.push_str(", "); + } + format_ast_expr(db, elem, buf); + } + buf.push_str("])"); + } + AstExprKind::Constructor(path, fields) => { + buf.push_str("Constructor("); + format_ast_path(db, path, buf); + buf.push_str(", ["); + for (i, field) in fields.values.iter().enumerate() { + if i > 0 { + buf.push_str(", "); + } + format_identifier(db, &field.name, buf); + buf.push_str(": "); + format_ast_expr(db, &field.value, buf); + } + buf.push_str("])"); + } + AstExprKind::Return(opt_expr) => { + buf.push_str("Return"); + if let Some(sub_expr) = opt_expr { + buf.push('('); + format_ast_expr(db, sub_expr, buf); + buf.push(')'); + } + } + AstExprKind::Await { future, .. } => { + buf.push_str("Await("); + format_ast_expr(db, future, buf); + buf.push(')'); + } + AstExprKind::PermissionOp { value, op } => { + let op_str = match op { + PermissionOp::Mutate => "Mutate", + PermissionOp::Reference => "Reference", + PermissionOp::Give => "Give", + PermissionOp::Share => "Share", + }; + buf.push_str("PermissionOp("); + buf.push_str(op_str); + buf.push_str(", "); + format_ast_expr(db, value, buf); + buf.push(')'); + } + AstExprKind::BinaryOp(spanned_op, lhs, rhs) => { + buf.push_str("BinaryOp("); + buf.push_str(&spanned_op.op.to_string()); + buf.push_str(", "); + format_ast_expr(db, lhs, buf); + buf.push_str(", "); + format_ast_expr(db, rhs, buf); + buf.push(')'); + } + AstExprKind::UnaryOp(spanned_op, sub_expr) => { + let op_str = match spanned_op.op { + UnaryOp::Not => "!", + UnaryOp::Negate => "-", + }; + buf.push_str("UnaryOp("); + buf.push_str(op_str); + buf.push_str(", "); + format_ast_expr(db, sub_expr, buf); + buf.push(')'); + } + AstExprKind::If(arms) => { + buf.push_str("If(["); + for (i, arm) in arms.iter().enumerate() { + if i > 0 { + buf.push_str(", "); + } + if let Some(cond) = &arm.condition { + format_ast_expr(db, cond, buf); + buf.push_str(" => "); + } else { + buf.push_str("else => "); + } + buf.push_str("Block(["); + for (j, stmt) in arm.result.statements(db).values.iter().enumerate() { + if j > 0 { + buf.push_str(", "); + } + format_ast_statement(db, stmt, buf); + } + buf.push_str("])"); + } + buf.push_str("])"); + } + } +} + +fn format_ast_statement<'db>(db: &'db dyn crate::Db, stmt: &AstStatement<'db>, buf: &mut String) { + match stmt { + AstStatement::Let(let_stmt) => { + buf.push_str("Let("); + format_identifier(db, &let_stmt.name(db), buf); + if let Some(init) = let_stmt.initializer(db) { + buf.push_str(", "); + format_ast_expr(db, &init, buf); + } + buf.push(')'); + } + AstStatement::Expr(expr) => { + format_ast_expr(db, expr, buf); + } + } +} + +fn format_identifier(db: &dyn crate::Db, id: &SpannedIdentifier<'_>, buf: &mut String) { + buf.push_str(id.id.text(db)); +} + +fn format_ast_path(db: &dyn crate::Db, path: &dada_ir_ast::ast::AstPath<'_>, buf: &mut String) { + match path.kind(db) { + AstPathKind::Identifier(spanned_id) => { + format_identifier(db, spanned_id, buf); + } + AstPathKind::GenericArgs { path, .. } => { + format_ast_path(db, path, buf); + buf.push_str("[...]"); + } + AstPathKind::Member { path, id } => { + format_ast_path(db, path, buf); + buf.push('.'); + format_identifier(db, id, buf); + } + } +} + +fn escape_string_into(s: &str, buf: &mut String) { + for ch in s.chars() { + match ch { + '\n' => buf.push_str("\\n"), + '\r' => buf.push_str("\\r"), + '\t' => buf.push_str("\\t"), + '\\' => buf.push_str("\\\\"), + '"' => buf.push_str("\\\""), + c if c.is_control() => { + buf.push_str(&format!("\\x{:02X}", c as u32)); + } + c => buf.push(c), + } + } +} diff --git a/components/dada-spec-common/Cargo.toml b/components/dada-spec-common/Cargo.toml new file mode 100644 index 00000000..a72b0ff9 --- /dev/null +++ b/components/dada-spec-common/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "dada-spec-common" +version.workspace = true +edition.workspace = true + +[dependencies] +regex = "1.0" diff --git a/components/dada-spec-common/src/lib.rs b/components/dada-spec-common/src/lib.rs new file mode 100644 index 00000000..996c297b --- /dev/null +++ b/components/dada-spec-common/src/lib.rs @@ -0,0 +1,734 @@ +use std::path::Path; + +use regex::Regex; + +/// Converts a file path (relative to the spec book's `src/` directory) into +/// the dot-separated prefix for spec paragraph IDs. +/// +/// # Examples +/// - `syntax/string-literals.md` → `syntax.string-literals` +/// - `syntax/README.md` → `syntax` +/// - `conventions.md` → `conventions` +/// - `README.md` → `` (empty) +pub fn file_path_to_prefix(source_path: &Path) -> String { + let without_ext = source_path.with_extension(""); + + // 💡 If the file is README.md, the prefix comes only from the parent directory. + // This matches the mdbook convention where README.md is the index page for a directory. + let effective_path = if without_ext.file_name().and_then(|f| f.to_str()) == Some("README") { + without_ext + .parent() + .map(|p| p.to_path_buf()) + .unwrap_or_default() + } else { + without_ext + }; + + effective_path + .components() + .filter_map(|c| c.as_os_str().to_str()) + .collect::>() + .join(".") +} + +/// Converts a heading text into a spec ID segment. +/// +/// Lowercases, replaces spaces/underscores with hyphens, strips non-alphanumeric characters +/// (except hyphens). +pub fn heading_to_segment(heading_text: &str) -> String { + heading_text + .trim() + .to_lowercase() + .replace([' ', '_'], "-") + .chars() + .filter(|c| c.is_alphanumeric() || *c == '-') + .collect() +} + +/// Joins non-empty segments into a dot-separated spec ID. +/// +/// Given a file prefix, the current heading segments, and an optional local name, +/// produces the fully-qualified spec paragraph ID. +pub fn resolve_spec_id(file_prefix: &str, heading_segments: &[String], local_name: &str) -> String { + let mut parts: Vec<&str> = Vec::new(); + + if !file_prefix.is_empty() { + parts.push(file_prefix); + } + for seg in heading_segments { + if !seg.is_empty() { + parts.push(seg); + } + } + if !local_name.is_empty() { + parts.push(local_name); + } + + parts.join(".") +} + +/// Determines whether a token in a `:::{spec}` directive line is a tag rather than a local name. +/// +/// Tags are: `rfcNNNN`, `!rfcNNNN`, `unimpl`. +pub fn is_tag(token: &str) -> bool { + token.starts_with("rfc") + || token.starts_with("!rfc") + || token.starts_with('!') + || token == "unimpl" +} + +/// Parses the tokens after `:::{spec}` into an optional local name and a list of tags. +/// +/// If the first token looks like a tag (starts with `rfc`, `!`, or is `unimpl`), +/// all tokens are treated as tags and there is no local name. +/// Otherwise the first token is the local name and the rest are tags. +pub fn parse_spec_tokens(rest: &str) -> (Option, Vec) { + let tokens: Vec<&str> = rest.split_whitespace().filter(|s| !s.is_empty()).collect(); + if tokens.is_empty() { + return (None, vec![]); + } + if is_tag(tokens[0]) { + (None, tokens.iter().map(|s| s.to_string()).collect()) + } else { + ( + Some(tokens[0].to_string()), + tokens[1..].iter().map(|s| s.to_string()).collect(), + ) + } +} + +/// Tracks the current heading context while scanning a markdown file line-by-line. +/// +/// 💡 H1 headings (`#`) are skipped because they correspond to the page title, +/// which is already captured in the file path prefix. Including H1 would double-count. +pub struct HeadingTracker { + /// Stack of (heading level, segment) pairs. + stack: Vec<(usize, String)>, +} + +impl HeadingTracker { + pub fn new() -> Self { + HeadingTracker { stack: Vec::new() } + } + + /// Processes a line of markdown. If it's a heading (level >= 2), + /// updates the heading stack by popping entries at the same or deeper level, + /// then pushing this heading. + /// + /// Returns `true` if the line was a heading. + pub fn process_line(&mut self, line: &str) -> bool { + let trimmed = line.trim(); + + // Count leading `#` characters + let hashes = trimmed.chars().take_while(|&c| c == '#').count(); + if hashes == 0 || hashes > 6 { + return false; + } + + // Must be followed by a space + let rest = &trimmed[hashes..]; + if !rest.starts_with(' ') { + return false; + } + + let heading_text = rest.trim(); + if heading_text.is_empty() { + return false; + } + + // Skip H1 — it corresponds to the file/page title, already in file_prefix + if hashes == 1 { + return true; + } + + let segment = heading_to_segment(heading_text); + + // Pop all entries at the same or deeper level + self.stack.retain(|(level, _)| *level < hashes); + self.stack.push((hashes, segment)); + + true + } + + /// Returns the current heading segments (just the segment strings, in order). + pub fn current_segments(&self) -> Vec { + self.stack.iter().map(|(_, s)| s.clone()).collect() + } +} + +impl Default for HeadingTracker { + fn default() -> Self { + Self::new() + } +} + +/// Renders a list of spec tags (e.g., `rfc0001`, `unimpl`, `!rfc0001`) as HTML badge spans. +/// +/// Returns an empty string if tags is empty, otherwise returns a space-prefixed +/// string of badge spans (matching the block-level directive badge format). +pub fn render_tag_badges(tags: &[String]) -> String { + if tags.is_empty() { + return String::new(); + } + + let badges: Vec = tags + .iter() + .map(|tag| { + if tag.starts_with('!') { + format!("{tag}") + } else if tag == "unimpl" { + format!("{tag}") + } else { + format!("{tag}") + } + }) + .collect(); + format!(" {}", badges.join(" ")) +} + +/// A parsed inline sub-paragraph marker found within a spec directive's content. +pub struct InlineSubParagraph { + pub name: String, + pub tags: Vec, + /// The index of the line within the directive content where this marker appears. + pub line_index: usize, +} + +/// Extracts inline `` {spec}`name [tags...]` `` markers from the content lines of a spec directive block. +/// +/// The backtick content is parsed the same way as block directive arguments: +/// first token is the name, remaining tokens are tags (rfc, unimpl, etc.). +pub fn extract_inline_sub_paragraphs(content_lines: &[String]) -> Vec { + let re = Regex::new(r"\{spec\}`([^`]+)`").unwrap(); + let mut results = Vec::new(); + + for (i, line) in content_lines.iter().enumerate() { + for cap in re.captures_iter(line) { + if let Some(content) = cap.get(1) { + let (name, tags) = parse_spec_tokens(content.as_str()); + // 💡 For inline sub-paragraphs the first token is always the name, + // even if it looks like a tag — unlike block directives where a + // leading tag means "no local name". Inline markers must have a name. + let name = name.unwrap_or_else(|| content.as_str().to_string()); + results.push(InlineSubParagraph { + name, + tags, + line_index: i, + }); + } + } + } + + results +} + +/// Replaces inline `` {spec}`name [tags...]` `` markers in content lines with HTML anchor spans +/// and tag badges. +/// +/// Each marker becomes: +/// `.name [badges]` +pub fn transform_inline_sub_paragraphs(content_lines: &[String], parent_id: &str) -> Vec { + let re = Regex::new(r"\{spec\}`([^`]+)`").unwrap(); + + content_lines + .iter() + .map(|line| { + re.replace_all(line, |caps: ®ex::Captures| { + let content = &caps[1]; + let (name, tags) = parse_spec_tokens(content); + let name = name.unwrap_or_else(|| content.to_string()); + let full_id = format!("{parent_id}.{name}"); + let badges = render_tag_badges(&tags); + format!( + "\ + .{name}{badges}" + ) + }) + .into_owned() + }) + .collect() +} + +/// Converts a kebab-case name to PascalCase. +/// +/// # Examples +/// - `function` → `Function` +/// - `use-declaration` → `UseDeclaration` +/// - `class-member` → `ClassMember` +pub fn kebab_to_pascal_case(name: &str) -> String { + name.split('-') + .map(|word| { + let mut chars = word.chars(); + match chars.next() { + None => String::new(), + Some(first) => { + let mut s = first.to_uppercase().to_string(); + s.extend(chars); + s + } + } + }) + .collect() +} + +/// Converts a spec sub-paragraph name to its EBNF grammar symbol. +/// +/// 💡 Names ending in `-nt` are nonterminals: strip the suffix and PascalCase the rest. +/// All other names are terminals: wrap in backticks. +/// +/// # Examples +/// - `function-nt` → `Function` (nonterminal) +/// - `use-declaration-nt` → `UseDeclaration` (nonterminal) +/// - `as` → `` `as` `` (terminal) +/// - `pub` → `` `pub` `` (terminal) +pub fn spec_name_to_grammar_symbol(name: &str) -> String { + if let Some(stem) = name.strip_suffix("-nt") { + kebab_to_pascal_case(stem) + } else { + format!("`{name}`") + } +} + +/// Expands `...` placeholders in EBNF code blocks using the sub-paragraph names +/// from the same spec directive. +/// +/// When a ` ```ebnf ``` ` block contains `...`, this function: +/// 1. Replaces `...` with plain-text alternatives (one per line, aligned) +/// 2. Removes the `{spec}` sub-bullet lines (the EBNF already shows the alternatives) +/// 3. Preserves any suffix after `...` (like `| ε`) +/// +/// The output keeps markdown ` ```ebnf ``` ` fences — HTML rendering with links +/// is handled separately by the preprocessor. +pub fn expand_ebnf_in_directive(content_lines: &[String]) -> Vec { + let sub_paragraphs = extract_inline_sub_paragraphs(content_lines); + if sub_paragraphs.is_empty() { + return content_lines.to_vec(); + } + + // 💡 First pass: check if any ebnf block contains `...`. + // If not, return unchanged — no expansion needed. + let has_expandable = { + let mut in_ebnf = false; + let mut found = false; + for line in content_lines { + let trimmed = line.trim(); + if trimmed == "```ebnf" { + in_ebnf = true; + } else if trimmed == "```" && in_ebnf { + in_ebnf = false; + } else if in_ebnf && line.contains("...") { + found = true; + break; + } + } + found + }; + + if !has_expandable { + return content_lines.to_vec(); + } + + // Collect the lines that are sub-paragraph bullets (to remove them) + let bullet_lines: std::collections::HashSet = + sub_paragraphs.iter().map(|sp| sp.line_index).collect(); + + let alternatives: Vec = sub_paragraphs + .iter() + .map(|sp| spec_name_to_grammar_symbol(&sp.name)) + .collect(); + + let mut in_ebnf = false; + let mut result = Vec::new(); + + for (i, line) in content_lines.iter().enumerate() { + let trimmed = line.trim(); + + if trimmed == "```ebnf" { + in_ebnf = true; + result.push(line.clone()); + } else if trimmed == "```" && in_ebnf { + in_ebnf = false; + result.push(line.clone()); + } else if in_ebnf { + if let Some(dots_pos) = line.find("...") { + let prefix = &line[..dots_pos]; + let suffix = line[dots_pos + 3..].trim(); + + // 💡 Compute alignment padding for continuation lines. + let align_width = prefix.len(); + let padding = " ".repeat(align_width); + + for (j, alt) in alternatives.iter().enumerate() { + if j == 0 { + result.push(format!("{prefix}{alt}")); + } else { + result.push(format!("{padding}| {alt}")); + } + } + // Append suffix (e.g., `| ε`) as a final alternative + if !suffix.is_empty() { + let suffix = suffix.strip_prefix("| ").unwrap_or(suffix); + result.push(format!("{padding}| {suffix}")); + } + } else { + result.push(line.clone()); + } + } else if bullet_lines.contains(&i) { + // Skip sub-bullet lines — the expanded EBNF replaces them + } else { + result.push(line.clone()); + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + #[test] + fn test_file_path_to_prefix() { + assert_eq!( + file_path_to_prefix(Path::new("syntax/string-literals.md")), + "syntax.string-literals" + ); + assert_eq!(file_path_to_prefix(Path::new("syntax/README.md")), "syntax"); + assert_eq!( + file_path_to_prefix(Path::new("conventions.md")), + "conventions" + ); + assert_eq!(file_path_to_prefix(Path::new("README.md")), ""); + } + + #[test] + fn test_heading_to_segment() { + assert_eq!(heading_to_segment("Delimiters"), "delimiters"); + assert_eq!(heading_to_segment("Escape Sequences"), "escape-sequences"); + assert_eq!(heading_to_segment("Type"), "type"); + assert_eq!( + heading_to_segment(" Multiline Strings "), + "multiline-strings" + ); + } + + #[test] + fn test_resolve_spec_id() { + assert_eq!( + resolve_spec_id("syntax.string-literals", &["delimiters".into()], "quoted"), + "syntax.string-literals.delimiters.quoted" + ); + // No local name — ID is just context + assert_eq!( + resolve_spec_id("syntax.string-literals", &["delimiters".into()], ""), + "syntax.string-literals.delimiters" + ); + // Empty prefix (root file) + assert_eq!( + resolve_spec_id("", &["paragraph-references".into()], ""), + "paragraph-references" + ); + // Nested headings + assert_eq!( + resolve_spec_id( + "syntax.string-literals", + &["interpolation".into(), "advanced".into()], + "nesting" + ), + "syntax.string-literals.interpolation.advanced.nesting" + ); + } + + #[test] + fn test_is_tag() { + assert!(is_tag("rfc0001")); + assert!(is_tag("!rfc0001")); + assert!(is_tag("unimpl")); + assert!(!is_tag("quoted")); + assert!(!is_tag("delimiters")); + assert!(!is_tag("escape-sequences")); + } + + #[test] + fn test_parse_spec_tokens() { + // Local name + tags + let (name, tags) = parse_spec_tokens("quoted rfc0001 unimpl"); + assert_eq!(name, Some("quoted".to_string())); + assert_eq!(tags, vec!["rfc0001", "unimpl"]); + + // No local name — all tags + let (name, tags) = parse_spec_tokens("rfc0001 unimpl"); + assert_eq!(name, None); + assert_eq!(tags, vec!["rfc0001", "unimpl"]); + + // Empty + let (name, tags) = parse_spec_tokens(""); + assert_eq!(name, None); + assert!(tags.is_empty()); + + // Local name only, no tags + let (name, tags) = parse_spec_tokens("quoted"); + assert_eq!(name, Some("quoted".to_string())); + assert!(tags.is_empty()); + } + + #[test] + fn test_heading_tracker_basic() { + let mut tracker = HeadingTracker::new(); + + // H1 is skipped + assert!(tracker.process_line("# String Literals")); + assert!(tracker.current_segments().is_empty()); + + // H2 adds a segment + tracker.process_line("## Delimiters"); + assert_eq!(tracker.current_segments(), vec!["delimiters"]); + + // Same-level H2 replaces + tracker.process_line("## Type"); + assert_eq!(tracker.current_segments(), vec!["type"]); + + // H2 then H3 nests + tracker.process_line("## Escape Sequences"); + tracker.process_line("### Special Cases"); + assert_eq!( + tracker.current_segments(), + vec!["escape-sequences", "special-cases"] + ); + + // New H2 pops the H3 + tracker.process_line("## Interpolation"); + assert_eq!(tracker.current_segments(), vec!["interpolation"]); + } + + #[test] + fn test_heading_tracker_not_headings() { + let mut tracker = HeadingTracker::new(); + + // Not a heading — no space after # + assert!(!tracker.process_line("#NotAHeading")); + assert!(tracker.current_segments().is_empty()); + + // Regular text + assert!(!tracker.process_line("Some regular text")); + + // Empty line + assert!(!tracker.process_line("")); + + // Code with hashes + assert!(!tracker.process_line("####### Too many hashes")); + } + + #[test] + fn test_render_tag_badges() { + assert_eq!(render_tag_badges(&[]), ""); + + let result = render_tag_badges(&["rfc0001".to_string()]); + assert!(result.contains("class=\"spec-rfc-badge\"")); + assert!(result.contains("rfc0001")); + + let result = render_tag_badges(&["unimpl".to_string()]); + assert!(result.contains("spec-rfc-unimpl")); + + let result = render_tag_badges(&["!rfc0001".to_string()]); + assert!(result.contains("spec-rfc-deleted")); + + let result = render_tag_badges(&["rfc0001".to_string(), "unimpl".to_string()]); + assert!(result.contains("spec-rfc-badge\">rfc0001")); + assert!(result.contains("spec-rfc-unimpl\">unimpl")); + } + + #[test] + fn test_extract_inline_sub_paragraphs() { + let lines = vec![ + "There are multiple forms:".to_string(), + "".to_string(), + "* {spec}`quoted` Single quote...".to_string(), + "* {spec}`triple-quoted` Triple quote...".to_string(), + "Some other text".to_string(), + ]; + let subs = extract_inline_sub_paragraphs(&lines); + assert_eq!(subs.len(), 2); + assert_eq!(subs[0].name, "quoted"); + assert!(subs[0].tags.is_empty()); + assert_eq!(subs[0].line_index, 2); + assert_eq!(subs[1].name, "triple-quoted"); + assert!(subs[1].tags.is_empty()); + assert_eq!(subs[1].line_index, 3); + } + + #[test] + fn test_extract_inline_sub_paragraphs_with_tags() { + let lines = vec![ + "* {spec}`quoted` Implemented.".to_string(), + "* {spec}`triple-quoted unimpl` Not yet.".to_string(), + "* {spec}`raw rfc0002 unimpl` Future RFC.".to_string(), + ]; + let subs = extract_inline_sub_paragraphs(&lines); + assert_eq!(subs.len(), 3); + + assert_eq!(subs[0].name, "quoted"); + assert!(subs[0].tags.is_empty()); + + assert_eq!(subs[1].name, "triple-quoted"); + assert_eq!(subs[1].tags, vec!["unimpl"]); + + assert_eq!(subs[2].name, "raw"); + assert_eq!(subs[2].tags, vec!["rfc0002", "unimpl"]); + } + + #[test] + fn test_transform_inline_sub_paragraphs() { + let lines = vec![ + "There are multiple forms:".to_string(), + "* {spec}`quoted` Single quote literals.".to_string(), + ]; + let result = transform_inline_sub_paragraphs(&lines, "syntax.string-literals.delimiters"); + + assert_eq!(result[0], "There are multiple forms:"); + assert!(result[1].contains("id=\"syntax.string-literals.delimiters.quoted\"")); + assert!(result[1].contains(">.quoted")); + assert!(result[1].contains("Single quote literals.")); + } + + #[test] + fn test_transform_inline_sub_paragraphs_with_tags() { + let lines = vec![ + "* {spec}`quoted` Implemented.".to_string(), + "* {spec}`triple-quoted unimpl` Not yet.".to_string(), + ]; + let result = transform_inline_sub_paragraphs(&lines, "s.delimiters"); + + // quoted: no badges + assert!(result[0].contains("id=\"s.delimiters.quoted\"")); + assert!(!result[0].contains("spec-rfc-badge")); + + // triple-quoted: unimpl badge, and ID uses only the name + assert!(result[1].contains("id=\"s.delimiters.triple-quoted\"")); + assert!(result[1].contains(">.triple-quoted")); + assert!(result[1].contains("spec-rfc-unimpl")); + assert!(result[1].contains("Not yet.")); + } + + #[test] + fn test_kebab_to_pascal_case() { + assert_eq!(kebab_to_pascal_case("function"), "Function"); + assert_eq!(kebab_to_pascal_case("use-declaration"), "UseDeclaration"); + assert_eq!(kebab_to_pascal_case("class-member"), "ClassMember"); + assert_eq!(kebab_to_pascal_case("a"), "A"); + assert_eq!( + kebab_to_pascal_case("generic-parameter"), + "GenericParameter" + ); + } + + #[test] + fn test_spec_name_to_grammar_symbol() { + // Nonterminals: -nt suffix → PascalCase + assert_eq!(spec_name_to_grammar_symbol("function-nt"), "Function"); + assert_eq!( + spec_name_to_grammar_symbol("use-declaration-nt"), + "UseDeclaration" + ); + assert_eq!(spec_name_to_grammar_symbol("class-nt"), "Class"); + + // Terminals: no suffix → backtick-wrapped + assert_eq!(spec_name_to_grammar_symbol("as"), "`as`"); + assert_eq!(spec_name_to_grammar_symbol("pub"), "`pub`"); + assert_eq!(spec_name_to_grammar_symbol("async"), "`async`"); + } + + #[test] + fn test_expand_ebnf_nonterminals() { + let lines = vec![ + "An item `Item` is one of the following:".to_string(), + "".to_string(), + "```ebnf".to_string(), + "Item ::= ...".to_string(), + "```".to_string(), + "".to_string(), + "* {spec}`function-nt` A function `Function`.".to_string(), + "* {spec}`class-nt` A class `Class`.".to_string(), + "* {spec}`struct-nt` A struct `Struct`.".to_string(), + ]; + let result = expand_ebnf_in_directive(&lines); + // Plain text, one alternative per line + assert!(result.iter().any(|l| l.contains("Item ::= Function"))); + assert!(result.iter().any(|l| l.contains("| Class"))); + assert!(result.iter().any(|l| l.contains("| Struct"))); + // Bullet lines removed + assert!(!result.iter().any(|l| l.contains("{spec}"))); + // Prose preserved + assert!(result.iter().any(|l| l.contains("An item"))); + // Still has markdown fences + assert!(result.iter().any(|l| l.contains("```ebnf"))); + } + + #[test] + fn test_expand_ebnf_terminals() { + let lines = vec![ + "Keywords:".to_string(), + "```ebnf".to_string(), + "Keyword ::= ...".to_string(), + "```".to_string(), + "* {spec}`as` `as`".to_string(), + "* {spec}`async` `async`".to_string(), + "* {spec}`await` `await`".to_string(), + ]; + let result = expand_ebnf_in_directive(&lines); + // Terminals wrapped in backticks (plain text) + assert!(result.iter().any(|l| l.contains("Keyword ::= `as`"))); + assert!(result.iter().any(|l| l.contains("| `async`"))); + assert!(result.iter().any(|l| l.contains("| `await`"))); + // Bullet lines removed + assert!(!result.iter().any(|l| l.contains("{spec}"))); + } + + #[test] + fn test_expand_ebnf_with_suffix() { + let lines = vec![ + "Visibility:".to_string(), + "```ebnf".to_string(), + "Visibility ::= ... | ε".to_string(), + "```".to_string(), + "* {spec}`pub` `pub`.".to_string(), + "* {spec}`export` `export`.".to_string(), + ]; + let result = expand_ebnf_in_directive(&lines); + assert!(result.iter().any(|l| l.contains("Visibility ::= `pub`"))); + assert!(result.iter().any(|l| l.contains("| `export`"))); + assert!(result.iter().any(|l| l.contains("| ε"))); + assert!(!result.iter().any(|l| l.contains("{spec}"))); + } + + #[test] + fn test_expand_ebnf_no_dots() { + // EBNF without `...` should pass through unchanged, even with sub-paragraphs + let lines = vec![ + "```ebnf".to_string(), + "ClassMember ::= Field | Function".to_string(), + "```".to_string(), + "* {spec}`field-member` A field.".to_string(), + ]; + let result = expand_ebnf_in_directive(&lines); + assert_eq!(result[0], "```ebnf"); + assert_eq!(result[1], "ClassMember ::= Field | Function"); + assert_eq!(result[2], "```"); + assert_eq!(result[3], "* {spec}`field-member` A field."); + } + + #[test] + fn test_expand_ebnf_no_sub_paragraphs() { + // `...` but no sub-paragraphs — no expansion + let lines = vec![ + "```ebnf".to_string(), + "Item ::= ...".to_string(), + "```".to_string(), + ]; + let result = expand_ebnf_in_directive(&lines); + assert_eq!(result[0], "```ebnf"); + assert_eq!(result[1], "Item ::= ..."); + assert_eq!(result[2], "```"); + } +} diff --git a/components/xtask/src/rfc.rs b/components/xtask/src/rfc.rs index 3dd62110..68c37234 100644 --- a/components/xtask/src/rfc.rs +++ b/components/xtask/src/rfc.rs @@ -63,10 +63,10 @@ impl Rfc { let dir_str = dir_name.to_string_lossy(); // Extract number from directory names like "0001-feature-name" - if let Some(number_str) = dir_str.split('-').next() { - if let Ok(number) = number_str.parse::() { - max_number = max_number.max(number); - } + if let Some(number_str) = dir_str.split('-').next() + && let Ok(number) = number_str.parse::() + { + max_number = max_number.max(number); } } } diff --git a/rfcs/src/0001-string-literals/README.md b/rfcs/src/0001-string-literals/README.md index fea53e85..6f591d93 100644 --- a/rfcs/src/0001-string-literals/README.md +++ b/rfcs/src/0001-string-literals/README.md @@ -62,9 +62,9 @@ result := "The sum is {calculate_sum(a, b)}" status := "Processing {completed}/{total} items ({(completed * 100 / total).round()}%)" ``` -For cases where literal braces are needed, they can be escaped by doubling: +For cases where literal braces are needed, they can be escaped with a backslash: ```dada -json := "{{ \"name\": \"{name}\" }}" # Produces: { "name": "Alice" } +json := "\{ \"name\": \"{name}\" \}" # Produces: { "name": "Alice" } ``` ### Triple-quoted strings @@ -139,17 +139,11 @@ raw_message := "\ assert raw_message == "\n Hello, Alice!\n Welcome to Dada.\n This is preserved exactly as written.\n" ``` -To include a trailing newline in the dedented string, end with `\n`: +Leading and trailing whitespace is stripped; only internal content is preserved. +Escape sequences like `\n` are part of the content, not whitespace, +so they survive stripping: ```dada -# With trailing newline -with_newline := " - Line 1 - Line 2 - Line 3\n -" -assert with_newline == "Line 1\nLine 2\nLine 3\n" - # Without trailing newline (default) without_newline := " Line 1 @@ -157,6 +151,14 @@ without_newline := " Line 3 " assert without_newline == "Line 1\nLine 2\nLine 3" + +# With trailing newline via escape sequence +with_newline := " + Line 1 + Line 2 + Line 3\n +" +assert with_newline == "Line 1\nLine 2\nLine 3\n" ``` Interpolation works seamlessly with multiline strings: @@ -215,8 +217,8 @@ nested := "Result: {if true { "yes" } else { "no" }}" # Results in: Result: yes ### Escape Sequences -- `{{` produces a literal `{` -- `}}` produces a literal `}` +- `\{` produces a literal `{` +- `\}` produces a literal `}` - `\"` produces a literal quote (not needed in triple-quoted strings) - `\n`, `\r`, `\t`, `\\` follow standard conventions - Triple-quoted strings cannot contain three consecutive quote characters @@ -236,12 +238,19 @@ A: Experience from Rust and other languages shows that building strings with dyn **Q: What about purely static strings with no interpolation?** A: The compiler can easily detect string literals that contain no interpolation expressions and optimize them accordingly. -**Q: How does this interact with raw strings or multi-line strings?** -A: This RFC focuses on basic string literals. Raw strings and multi-line strings will be addressed in future RFCs, but they will follow the same principle of supporting interpolation by default. - **Q: Why `{}` instead of `${}` like JavaScript?** A: The simpler `{}` syntax is more consistent with Rust's format strings and requires less visual noise. Since interpolation is the default, the syntax should be as lightweight as possible. +**Q: Why `\{` instead of `{{` to escape braces?** +A: Two reasons. First, Dada string literals already use backslash escapes (`\n`, `\t`, `\\`, `\"`), so `\{` is consistent with the existing escape system — it would be odd to have two different escaping mechanisms in the same literal. Second, keeping `{{` free means it works as an interpolated block expression, which is useful for embedding multiline code: +```dada +result := "the value is {{ + x := foo() + bar(x) +}}" +``` +Languages like Rust and Python use `{{` for brace escaping because their interpolation lives in format macros or f-strings where backslash escapes aren't available. Dada strings have backslash escapes natively, so there's no reason not to use them. + ## Future possibilities - **Raw string literals** - A syntax to disable escape sequence processing (e.g., `r"C:\path\to\file"` would not interpret `\p`, `\t`, `\f` as escape sequences) diff --git a/rfcs/src/0001-string-literals/impl.md b/rfcs/src/0001-string-literals/impl.md index f55ad457..89a2d09b 100644 --- a/rfcs/src/0001-string-literals/impl.md +++ b/rfcs/src/0001-string-literals/impl.md @@ -3,10 +3,36 @@ *This file tracks implementation progress for RFC-0001: String Literals* ## Status -Not started +In progress -## Implementation Plan -*To be filled in when implementation begins* +## Completed +- [x] Escape sequence processing (`\n`, `\t`, `\\`, `\"`, `\{`, `\}`, `\r`) +- [x] Triple-quoted strings (disambiguation, termination, embedded quotes) +- [x] String type (`my String`) +- [x] Invalid escape sequence errors +- [x] Brace escaping (`\{`, `\}`) +- [x] Multiline strings: leading newline removal, trailing whitespace removal, auto-dedenting +- [x] Escape sequences treated as content during dedenting +- [x] Raw strings (`"\` prefix disables dedenting) +- [x] Ast probe infrastructure for tokenizer-level TDD -## Progress Notes -*To be updated as implementation proceeds* \ No newline at end of file +## Remaining +- [ ] String interpolation: curly brace expressions inside strings +- [ ] Lexer brace nesting depth tracking +- [ ] Nested quotes inside interpolated expressions +- [ ] Interpolation scope — evaluated in enclosing scope +- [ ] Interpolation evaluation order — left-to-right +- [ ] Type checking for interpolated expressions +- [ ] Permission system for interpolated expressions +- [ ] String conversion mechanism — blocked on trait/interface RFC + +## Spec Paragraphs +14/22 spec paragraphs implemented in `spec/src/syntax/string-literals.md`. +8 remaining: 7 interpolation + 1 string conversion. + +## Notes +- Spec paragraphs authored directly in the spec (not in `rfcs/src/0001-string-literals/spec.md`), + validating the RFC-0002 workflow +- Ast probe (`#? Ast:`) enables TDD for tokenizer-level features +- `process_escape_sequences()` standalone function duplicates logic from `Tokenizer::escape_sequence()` — + if escape rules change, both must be updated diff --git a/rfcs/src/0001-string-literals/spec.md b/rfcs/src/0001-string-literals/spec.md deleted file mode 100644 index f52bb872..00000000 --- a/rfcs/src/0001-string-literals/spec.md +++ /dev/null @@ -1,42 +0,0 @@ -# Specification draft - -*This file contains draft specification text for RFC-0001: String Literals* - -## String Literals - -r[syntax.string-literals.type] -String literals have type `my String`. - -r[syntax.string-literals.double-quoted] -String literals can be delimited by double quotes, beginning with `"` and ending with `"`. Note that `"""` is interpreted as the start of a triple-quoted string and not an empty string followed by a `"` character. - -r[syntax.string-literals.triple-quoted] -String literals can be delimited by triple quotes, beginning with `"""` and ending with `"""`. Embedded double quotes do not need escaping. - -r[syntax.string-literals.interpolation] -String literals may contain interpolation expressions within curly braces (`{expression}`). - -r[syntax.string-literals.lexical-analysis] -The lexer recognizes string literals with interpolation and treats characters inside `{}` as part of the interpolated expression, not the string literal. Quotes inside interpolated expressions do not terminate the string literal. - -r[syntax.string-literals.escaping] -Literal braces are escaped by doubling: `{{` produces `{` and `}}` produces `}`. - -r[syntax.string-literals.evaluation] -Interpolated expressions are evaluated at runtime in the current scope, converted to strings, and evaluated left-to-right. - -r[syntax.string-literals.multiline] -A string literal that begins with a newline immediately after the opening quote is a multiline string literal with automatic indentation handling. - -r[syntax.string-literals.multiline.dedenting] -For multiline string literals where each line is either empty or has a consistent whitespace prefix: -- Leading and trailing whitespace is trimmed -- The common whitespace prefix is removed from the start of each line - -r[syntax.string-literals.multiline.raw] -A multiline string literal that begins with `"\` followed by a newline preserves the string exactly as written, including the leading newline and all indentation. - -r[syntax.string-literals.multiline.trailing-newline] -A multiline string literal ending with `\n` before the closing quote includes a trailing newline in the final string value. - -*More detailed specification text to be developed during implementation* \ No newline at end of file diff --git a/rfcs/src/0001-string-literals/todo.md b/rfcs/src/0001-string-literals/todo.md index 8ccb2836..e19087cc 100644 --- a/rfcs/src/0001-string-literals/todo.md +++ b/rfcs/src/0001-string-literals/todo.md @@ -24,11 +24,11 @@ RFC drafted with multiline string support, ready for implementation planning - Common whitespace prefix removal - `"\` syntax to disable dedenting - `\n` before closing quote for trailing newline -- Updated spec.md with multiline string specification entries +- Spec paragraphs are authored directly in `spec/src/syntax/string-literals.md` (not in a separate RFC spec.md) - Created executable examples using `assert` syntax - Added design tenets section with three core principles: - Do what I mean - Rust-like syntax - Simple escape hatch - Added triple-quoted string literals (`"""`) for embedded quotes -- Restructured spec.md with cleaner rule separation \ No newline at end of file +- Restructured spec paragraphs with cleaner rule separation \ No newline at end of file diff --git a/spec/src/SUMMARY.md b/spec/src/SUMMARY.md index 140d7f57..573012ac 100644 --- a/spec/src/SUMMARY.md +++ b/spec/src/SUMMARY.md @@ -6,6 +6,11 @@ # Language - [Syntax](syntax/README.md) + - [Lexical Structure](syntax/lexical-structure.md) + - [Items](syntax/items.md) + - [Statements](syntax/statements.md) + - [Expressions](syntax/expressions.md) + - [Types and Permissions](syntax/types-and-permissions.md) - [Literals](syntax/literals.md) - [String Literals](syntax/string-literals.md) diff --git a/spec/src/conventions.md b/spec/src/conventions.md index 3f42a577..6f5dad14 100644 --- a/spec/src/conventions.md +++ b/spec/src/conventions.md @@ -7,40 +7,103 @@ This chapter describes the conventions used throughout this specification. Specification paragraphs use MyST directive syntax with the `{spec}` directive: ```markdown -:::{spec} topic.subtopic.detail +:::{spec} local-name rfc123 Paragraph content. ::: ``` -These labels serve multiple purposes: -- Cross-referencing within the specification -- Linking from RFC documents -- Test annotations via `#:spec topic.subtopic.detail` +### ID Resolution + +Paragraph IDs are resolved automatically from context: + +1. **File path**: `syntax/string-literals.md` contributes prefix `syntax.string-literals` +2. **Section headings**: `## Escape Sequences` contributes segment `escape-sequences` +3. **Local name**: The name in the `:::{spec}` directive (e.g., `invalid`) + +These combine to form the full ID: `syntax.string-literals.escape-sequences.invalid` + +The local name is optional. A directive with only tags uses the heading context as its ID: + +```markdown +## Type + +:::{spec} rfc0001 unimpl +String literals have type `my String`. +::: +``` -Identifiers use semantic names rather than numbers to remain stable as the specification evolves. Examples include: -- `syntax.string-literals.escape-sequences` -- `permissions.lease.transfer-rules` -- `types.classes.field-access` +This paragraph's ID is `syntax.string-literals.type` (file prefix + heading). -### RFC Annotations +### Inline Sub-paragraphs -Paragraphs modified by an RFC include RFC tags after the paragraph ID: +List items within a `:::{spec}` block can be marked as individually referenceable +sub-paragraphs using the `` {spec}`name` `` syntax: ```markdown -:::{spec} syntax.foo rfc123 -Content added or modified by RFC 123. +:::{spec} rfc0001 unimpl +There are multiple forms of string literals: + +* {spec}`quoted` Single-quoted string literals begin with `"` and end with `"`. +* {spec}`triple-quoted` Triple-quoted string literals begin with `"""` and end with `"""`. ::: ``` -Content deleted by an RFC uses the `!` prefix: +Under `## Delimiters` in `syntax/string-literals.md`, this creates: +- `syntax.string-literals.delimiters` (parent paragraph) +- `syntax.string-literals.delimiters.quoted` (sub-paragraph) +- `syntax.string-literals.delimiters.triple-quoted` (sub-paragraph) + +Each sub-paragraph gets its own linkable anchor in the rendered output. + +### RFC and Status Annotations + +Paragraphs include tags after the optional local name: ```markdown -:::{spec} syntax.old-feature !rfc123 -This feature is removed. +:::{spec} local-name rfc123 unimpl +Content added by RFC 123, not yet implemented. ::: ``` -Multiple RFCs can be specified: `:::{spec} topic.foo rfc123 rfc456` +Available tags: +- `rfcN` — content added or modified by RFC N +- `!rfcN` — content deleted by RFC N +- `unimpl` — specified but not yet implemented + +Multiple tags can be combined: `:::{spec} local-name rfc123 rfc456 unimpl` + +### Test Annotations + +Tests reference spec paragraphs using `#:spec` comments with the fully-qualified ID: + +```dada +#:spec syntax.string-literals.delimiters.quoted +``` + +These labels serve multiple purposes: +- Cross-referencing within the specification +- Linking from RFC documents +- Test validation via `#:spec` annotations in `.dada` test files + +Identifiers use semantic names rather than numbers to remain stable as the specification evolves. + +## EBNF Notation + +This specification uses Extended Backus-Naur Form (EBNF) to describe syntax. +Standard EBNF operators apply: + +- `A*` — zero or more repetitions of A +- `A+` — one or more repetitions of A +- `A?` — optional A +- `A | B` — A or B +- `` `keyword` `` — a literal terminal +- `ε` — the empty production + +In addition, this specification uses the following shorthand +for comma-separated lists with optional trailing commas: + +- `A,*` — zero or more comma-separated occurrences of A +- `A,+` — one or more comma-separated occurrences of A ## Normative Language @@ -49,4 +112,4 @@ This specification uses the following terms to indicate requirements: - **must not**: An absolute prohibition - **should**: A strong recommendation - **should not**: A strong recommendation against -- **may**: An optional feature or behavior \ No newline at end of file +- **may**: An optional feature or behavior diff --git a/spec/src/syntax/expressions.md b/spec/src/syntax/expressions.md new file mode 100644 index 00000000..5d13ef5c --- /dev/null +++ b/spec/src/syntax/expressions.md @@ -0,0 +1,249 @@ +# Expressions + +This chapter specifies the expression syntax of Dada. + +## `Expr` definition + +:::{spec} +An expression `Expr` is parsed using precedence climbing. +From lowest to highest precedence: + +```ebnf +Expr ::= ... +``` + +* {spec}`assign-expr-nt` An assignment expression `AssignExpr`. +::: + +## `AssignExpr` definition + +:::{spec} +The assignment operator `=` assigns a value to a place expression. +It has the lowest precedence among binary operators: + +```ebnf +AssignExpr ::= ... +``` + +* {spec}`or-expr-nt` A logical OR expression `OrExpr` (`=` `OrExpr`)? +::: + +## `OrExpr` definition + +:::{spec} +The logical OR operator `||` performs short-circuit boolean logic: + +```ebnf +OrExpr ::= ... +``` + +* {spec}`and-expr-nt` An AND expression `AndExpr` (`||` `AndExpr`)* +::: + +## `AndExpr` definition + +:::{spec} +The logical AND operator `&&` performs short-circuit boolean logic: + +```ebnf +AndExpr ::= ... +``` + +* {spec}`compare-expr-nt` A comparison expression `CompareExpr` (`&&` `CompareExpr`)* +::: + +## `CompareExpr` definition + +:::{spec} +The comparison operators compare two values and produce a boolean result: + +```ebnf +CompareExpr ::= ... +``` + +* {spec}`add-expr-nt` An additive expression `AddExpr` (`CompareOp` `AddExpr`)* + +```ebnf +CompareOp ::= `==` | `<` | `>` | `<=` | `>=` +``` +::: + +## `AddExpr` definition + +:::{spec} +The additive operators perform addition and subtraction: + +```ebnf +AddExpr ::= ... +``` + +* {spec}`mul-expr-nt` A multiplicative expression `MulExpr` ((`+` | `-`) `MulExpr`)* +::: + +## `MulExpr` definition + +:::{spec} +The multiplicative operators perform multiplication and division: + +```ebnf +MulExpr ::= ... +``` + +* {spec}`unary-expr-nt` A unary expression `UnaryExpr` ((`*` | `/`) `UnaryExpr`)* +::: + +## `UnaryExpr` definition + +:::{spec} +A unary expression applies a prefix operator to a postfix expression: + +```ebnf +UnaryExpr ::= UnaryOp* PostfixExpr +UnaryOp ::= `!` | `-` +``` + +* {spec}`not` `!` performs logical negation. +* {spec}`negate` `-` performs arithmetic negation. +::: + +## Newline Sensitivity + +:::{spec} +A binary operator must appear on the same line as its left operand. +An operator on a new line begins a new expression or is interpreted as a prefix operator. +::: + +## `PostfixExpr` definition + +:::{spec} +A postfix expression applies zero or more postfix operators +to a primary expression: + +```ebnf +PostfixExpr ::= PrimaryExpr PostfixOp* +``` +::: + +### `PostfixOp` definition + +:::{spec} +A postfix operator `PostfixOp` is one of the following: + +```ebnf +PostfixOp ::= ... +``` + +* {spec}`field-access-nt` A field access `FieldAccess`. +* {spec}`call-nt` A function or method call `Call`. +* {spec}`await-nt` An await expression `Await`. +* {spec}`permission-op-nt` A permission operation `PermissionOp`. +::: + +### `FieldAccess` definition + +:::{spec} +A field access `FieldAccess` uses dot notation to access a field or name a method: + +```ebnf +FieldAccess ::= `.` Identifier +``` +::: + +### `Call` definition + +:::{spec} +A function or method call `Call` follows an expression with parenthesized arguments +separated by commas. +The opening parenthesis must appear on the same line as the callee: + +```ebnf +Call ::= `(` Expr,* `)` +``` +::: + +### `Await` definition + +:::{spec} +The `.await` postfix operator awaits the result of a future: + +```ebnf +Await ::= `.` `await` +``` +::: + +### `PermissionOp` definition + +:::{spec} +A permission operation `PermissionOp` requests specific permissions on a value: + +```ebnf +PermissionOp ::= ... +``` + +* {spec}`give` `.` `give` transfers ownership of the value. +* {spec}`share` `.` `share` creates a shared reference. +* {spec}`lease` `.` `mut` creates a mutable lease. +* {spec}`ref` `.` `ref` creates an immutable reference. +::: + +## `PrimaryExpr` definition + +:::{spec} +A primary expression `PrimaryExpr` is one of the following: + +```ebnf +PrimaryExpr ::= ... +``` + +* {spec}`literal-nt` A literal `Literal`. +* {spec}`identifier` An identifier `Identifier` referring to a variable or item in scope. +* {spec}`self` The keyword `self`, referring to the receiver of the current method. +* {spec}`if-expr-nt` An if expression `IfExpr`. +* {spec}`return-expr-nt` A return expression `ReturnExpr`. +* {spec}`constructor-expr-nt` A constructor expression `ConstructorExpr`. +* {spec}`paren-expr` A parenthesized expression `(` Expr `)`. +* {spec}`block-expr` A block expression `Block`. +::: + +### `IfExpr` definition + +:::{spec} +An if expression `IfExpr` evaluates a condition and executes a block: + +```ebnf +IfExpr ::= `if` Expr Block (`else` `if` Expr Block)* (`else` Block)? +``` +::: + +:::{spec} else +An `if` expression may have an `else` clause. +::: + +:::{spec} else-if +Multiple conditions may be chained with `else if`. +::: + +### `ReturnExpr` definition + +:::{spec} +A return expression `ReturnExpr` exits the enclosing function, +optionally with a value. +The value, if present, must appear on the same line as `return`: + +```ebnf +ReturnExpr ::= `return` Expr? +``` +::: + +### `ConstructorExpr` definition + +:::{spec} +A constructor expression `ConstructorExpr` creates a new instance +of a class or struct. +The opening brace must appear on the same line as the type name: + +```ebnf +ConstructorExpr ::= Identifier `{` ConstructorField,* `}` +ConstructorField ::= Identifier `:` Expr +``` +::: diff --git a/spec/src/syntax/items.md b/spec/src/syntax/items.md new file mode 100644 index 00000000..8c7707ca --- /dev/null +++ b/spec/src/syntax/items.md @@ -0,0 +1,247 @@ +# Items + +This chapter specifies the top-level items that can appear in a Dada source file. + +## Source Files + +:::{spec} +A Dada source file defines a module. +The module name is derived from the file name. +A source file contains zero or more items, +optionally followed by zero or more statements: + +```ebnf +SourceFile ::= Item* Statement* +``` +::: + +:::{spec} implicit-main +If a source file contains top-level statements, +they are wrapped in an implicit `async fn main()` function. +::: + +:::{spec} kinds +An item `Item` is one of the following: + +```ebnf +Item ::= ... +``` + +* {spec}`function-nt` A function `Function`. +* {spec}`class-nt` A class `Class`. +* {spec}`struct-nt` A struct `Struct`. +* {spec}`use-declaration-nt` A use declaration `UseDeclaration`. +::: + +## `Visibility` definition + +:::{spec} +Items and fields may have a visibility modifier. +Without a modifier, the item is private to the enclosing module. + +```ebnf +Visibility ::= ... | ε +``` + +* {spec}`pub` `pub` makes the item visible within the crate. +* {spec}`export` `export` makes the item visible outside the crate. +::: + +## `Function` definition + +:::{spec} +A function `Function` is declared with the `fn` keyword, +optionally preceded by effect keywords +and followed by a name, optional generic parameters, +parameters, optional return type, optional where clause, +and a body or semicolon: + +```ebnf +Function ::= Visibility Effect* `fn` Identifier GenericParameters? + `(` Parameters `)` ReturnType? WhereClause? FunctionBody +``` +::: + +### `Effect` definition + +:::{spec} +Effect keywords may appear in any order before `fn`: + +```ebnf +Effect ::= ... +``` + +* {spec}`async` `async` declares an asynchronous function. +* {spec}`unsafe` `unsafe` declares an unsafe function. +::: + +### `Parameters` definition + +:::{spec} +Function parameters are enclosed in parentheses and separated by commas: + +```ebnf +Parameters ::= FunctionInput,* +FunctionInput ::= SelfParameter | Parameter +``` +::: + +:::{spec} self +A function may have a `self` parameter as its first parameter, +optionally preceded by a permission keyword, +which makes it a method: + +```ebnf +SelfParameter ::= PermissionKeyword? `self` +``` +::: + +:::{spec} parameter-syntax +Each non-self parameter has the form `name: Type`. +A parameter may be preceded by `mut` +to declare a mutable binding: + +```ebnf +Parameter ::= `mut`? Identifier `:` Type +``` +::: + +### `FunctionBody` definition + +:::{spec} +A function may have a body, which is a block enclosed in curly braces. +If no body is present, the function has no definition. + +```ebnf +FunctionBody ::= Block | ε +``` +::: + +### `ReturnType` definition + +:::{spec} +A function may declare a return type with `->` followed by a `Type` after the parameters. + +ReturnType ::= `->` Type +::: + +### `GenericParameters` definition + +:::{spec} +A function may declare generic parameters in square brackets after the name: + +```ebnf +GenericParameters ::= `[` GenericParameter,* `]` +GenericParameter ::= `type` Identifier | `perm` Identifier +``` + +* {spec}`type-parameters` A type parameter `type` followed by a name: `type T`. +* {spec}`permission-parameters` A permission parameter `perm` followed by a name: `perm P`. +::: + +### `WhereClause` definition + +:::{spec} +A function may have a `where` clause after the return type +that constrains its generic parameters: + +```ebnf +WhereClause ::= `where` WhereConstraint,+ +WhereConstraint ::= Type `is` WhereKind (`+` WhereKind)* +WhereKind ::= ... +``` + +* {spec}`ref` `ref` +* {spec}`mut` `mut` +* {spec}`shared` `shared` +* {spec}`unique` `unique` +* {spec}`owned` `owned` +* {spec}`lent` `lent` +::: + +## `Class` definition + +:::{spec} +A class `Class` is declared with the `class` keyword. +Classes have reference semantics. + +```ebnf +Class ::= Visibility `class` Identifier GenericParameters? + ConstructorFields? WhereClause? ClassBody? +``` +::: + +### `ConstructorFields` definition + +:::{spec} +A class may declare constructor fields in parentheses after the name: + +```ebnf +ConstructorFields ::= `(` Field,* `)` +``` +::: + +### `ClassBody` definition + +:::{spec} +A class body enclosed in curly braces may contain field declarations and method definitions: + +```ebnf +ClassBody ::= `{` ClassMember* `}` +ClassMember ::= ... +``` + +* {spec}`field-nt` A field declaration `Field`. +* {spec}`method-nt` A method `Method`. +::: + +### `Method` definition + +:::{spec} +A method `Method` is a function declared inside a class or struct body: + +```ebnf +Method ::= Function +``` +::: + +### `Field` definition + +:::{spec} field-syntax +A field declaration `Field` has the form: + +```ebnf +Field ::= Visibility `mut`? Identifier `:` Type +``` +::: + +### Generics and Where Clauses + +:::{spec} +Classes support generic parameters and where clauses +with the same syntax as functions. +::: + +## `Struct` definition + +:::{spec} +A struct `Struct` is declared with the `struct` keyword. +The syntax is identical to `Class` but structs have value semantics. + +```ebnf +Struct ::= Visibility `struct` Identifier GenericParameters? + ConstructorFields? WhereClause? ClassBody? +``` +::: + +## `UseDeclaration` definition + +:::{spec} +A `use` declaration `UseDeclaration` imports a name from another crate, +optionally renaming it with `as`: + +```ebnf +UseDeclaration ::= `use` Path (`as` Identifier)? +Path ::= Identifier (`.` Identifier)* +``` +::: diff --git a/spec/src/syntax/lexical-structure.md b/spec/src/syntax/lexical-structure.md new file mode 100644 index 00000000..745f8c62 --- /dev/null +++ b/spec/src/syntax/lexical-structure.md @@ -0,0 +1,226 @@ +# Lexical Structure + +This chapter specifies the lexical structure of Dada programs. +A Dada source file is a sequence of Unicode characters, +which the lexer converts into a sequence of tokens. + +## Source Encoding + +:::{spec} +Dada source files are encoded as UTF-8. +::: + +## Tokens + +:::{spec} +The lexer produces a sequence of tokens: + +```ebnf +Token ::= ... +``` + +A token `Token` is one of the following kinds: + +* {spec}`identifier-nt` An identifier `Identifier`. +* {spec}`keyword-nt` A keyword `Keyword`. +* {spec}`literal-nt` A literal `Literal` (integer, string, or boolean). +* {spec}`operator-nt` A single punctuation or operator character `Operator`. +* {spec}`delimiter-nt` A delimited group `Delimiter`: matched pair of brackets and their contents. +::: + +:::{spec} preceding-whitespace +Each token records whether it was preceded by whitespace, a newline, or a comment. +This information is used by the parser but does not produce separate tokens. +::: + +## Whitespace and Comments + +### Whitespace + +:::{spec} +Whitespace characters (spaces, tabs, and other Unicode whitespace excluding newlines) +separate tokens but are otherwise not significant. +::: + +:::{spec} newlines +Newline characters (`\n`) are tracked by the lexer. +Whether a token is preceded by a newline +may affect how the parser interprets certain constructs. +::: + +### Comments + +:::{spec} +A comment begins with `#` and extends to the end of the line. +::: + +:::{spec} content +The content of a comment, including the leading `#`, is ignored by the lexer. +A comment implies a newline for the purpose of preceding-whitespace tracking. +::: + +## `Identifier` definition + +:::{spec} +An identifier `Identifier` begins with a Unicode alphabetic character or underscore (`_`), +followed by zero or more Unicode alphanumeric characters or underscores, +provided it is not a keyword `Keyword`: + +```ebnf +Identifier ::= (Alphabetic | `_`) (Alphanumeric | `_`)* (not a Keyword) +``` +::: + +:::{spec} case-sensitivity +Identifiers are case-sensitive. +::: + +## `Keyword` definition + +:::{spec} +The following words are reserved as keywords: + +```ebnf +Keyword ::= ... +``` + +* {spec}`as` `as` +* {spec}`async` `async` +* {spec}`await` `await` +* {spec}`class` `class` +* {spec}`else` `else` +* {spec}`enum` `enum` +* {spec}`export` `export` +* {spec}`false` `false` +* {spec}`fn` `fn` +* {spec}`give` `give` +* {spec}`given` `given` +* {spec}`if` `if` +* {spec}`is` `is` +* {spec}`let` `let` +* {spec}`match` `match` +* {spec}`mod` `mod` +* {spec}`mut` `mut` +* {spec}`my` `my` +* {spec}`our` `our` +* {spec}`perm` `perm` +* {spec}`pub` `pub` +* {spec}`ref` `ref` +* {spec}`return` `return` +* {spec}`self` `self` +* {spec}`share` `share` +* {spec}`shared` `shared` +* {spec}`struct` `struct` +* {spec}`true` `true` +* {spec}`type` `type` +* {spec}`unsafe` `unsafe` +* {spec}`use` `use` +* {spec}`where` `where` +::: + +## `Operator` definition + +:::{spec} +The following single characters are recognized as operator tokens: + +```ebnf +Operator ::= `+` | `-` | `*` | `/` | `%` | `=` | `!` + | `<` | `>` | `&` | `|` | `:` | `,` | `.` | `;` | `?` +``` + +* {spec}`plus` `+` +* {spec}`minus` `-` +* {spec}`star` `*` +* {spec}`slash` `/` +* {spec}`percent` `%` +* {spec}`equals` `=` +* {spec}`bang` `!` +* {spec}`less-than` `<` +* {spec}`greater-than` `>` +* {spec}`ampersand` `&` +* {spec}`pipe` `|` +* {spec}`colon` `:` +* {spec}`comma` `,` +* {spec}`dot` `.` +* {spec}`semicolon` `;` +* {spec}`question` `?` +::: + +:::{spec} multi-character +Multi-character operators such as `&&`, `||`, `==`, `<=`, `>=`, and `->` +are formed by the parser from adjacent operator tokens. +::: + +## `Delimiter` definition + +:::{spec} +A delimited token contains a matched pair of brackets and their contents: + +```ebnf +Delimiter ::= `(` Token* `)` | `[` Token* `]` | `{` Token* `}` +``` + +* {spec}`parentheses` Parentheses: `(` and `)`. +* {spec}`square-brackets` Square brackets: `[` and `]`. +* {spec}`curly-braces` Curly braces: `{` and `}`. +::: + +:::{spec} balanced +Delimiters must be balanced. +An opening delimiter without a matching closing delimiter is an error. +::: + +:::{spec} nesting +The lexer tracks delimiter nesting. +Content between matching delimiters is treated as a unit, +which enables deferred parsing of function bodies and other nested structures. +::: + +## `Literal` definition + +:::{spec} +A literal `Literal` is one of the following: + +```ebnf +Literal ::= ... +``` + +* {spec}`integer-literal-nt` An integer literal `IntegerLiteral`. +* {spec}`boolean-literal-nt` A boolean literal `BooleanLiteral`. +* {spec}`string-literal-nt` A string literal `StringLiteral`. +::: + +### `IntegerLiteral` definition + +:::{spec} +An integer literal `IntegerLiteral` is a sequence of one or more ASCII decimal digits (`0`–`9`), +optionally separated by underscores (`_`) that do not affect the value: + +```ebnf +IntegerLiteral ::= Digit (`_`? Digit)* +Digit ::= `0` | `1` | ... | `9` +``` +::: + +### `BooleanLiteral` definition + +:::{spec} +The keywords `true` and `false` are boolean literals: + +```ebnf +BooleanLiteral ::= `true` | `false` +``` +::: + +### `StringLiteral` definition + +:::{spec} +String literal syntax is specified in [String Literals](string-literals.md). +::: + +## Lexical Errors + +:::{spec} +Characters that do not begin a valid token are accumulated +and reported as a single error spanning the invalid sequence. +::: diff --git a/spec/src/syntax/literals.md b/spec/src/syntax/literals.md index 3605ee16..e8ca43e5 100644 --- a/spec/src/syntax/literals.md +++ b/spec/src/syntax/literals.md @@ -4,11 +4,13 @@ This chapter describes literal expressions in Dada. ## Numeric Literals -*To be specified* +See [Integer Literals](lexical-structure.md#integer-literals) for lexical syntax. + +*Numeric type inference and overflow behavior to be specified.* ## Boolean Literals -*To be specified* +See [Boolean Literals](lexical-structure.md#boolean-literals) for lexical syntax. ## String Literals diff --git a/spec/src/syntax/statements.md b/spec/src/syntax/statements.md new file mode 100644 index 00000000..6ea72bc4 --- /dev/null +++ b/spec/src/syntax/statements.md @@ -0,0 +1,67 @@ +# Statements + +This chapter specifies the statement syntax of Dada. + +## `Block` definition + +:::{spec} +A block `Block` is a sequence of zero or more statements +enclosed in curly braces: + +```ebnf +Block ::= `{` Statement* `}` +``` +::: + +:::{spec} value +A block evaluates to the value of its last expression, +if the last statement is an expression statement. +::: + +## `Statement` definition + +:::{spec} +A statement `Statement` is one of the following: + +```ebnf +Statement ::= ... +``` + +* {spec}`let-statement-nt` A let statement `LetStatement`. +* {spec}`expr-statement-nt` An expression statement `ExprStatement`. +::: + +## `LetStatement` definition + +:::{spec} +A let statement `LetStatement` introduces a new variable binding: + +```ebnf +LetStatement ::= `let` `mut`? Identifier (`:` Type)? (`=` Expr)? +``` +::: + +:::{spec} type-annotation +A `let` statement may include a type annotation: `let name: Type = value`. +::: + +:::{spec} mutable +A `let` statement may use `mut` to declare a mutable binding: +`let mut name = value`. +::: + +:::{spec} initializer-optional +The initializer (`= value`) is optional. +A variable may be declared without an initial value. +::: + +## `ExprStatement` definition + +:::{spec} +An expression statement `ExprStatement` is an expression +followed by a newline or end of block: + +```ebnf +ExprStatement ::= Expr +``` +::: diff --git a/spec/src/syntax/string-literals.md b/spec/src/syntax/string-literals.md index 3391c8b5..aca47281 100644 --- a/spec/src/syntax/string-literals.md +++ b/spec/src/syntax/string-literals.md @@ -2,16 +2,135 @@ This chapter specifies string literal syntax in Dada. -## Basic String Literals +## Delimiters -:::{spec} syntax.string-literals.basic -A basic string literal is delimited by double quotes (`"`). +:::{spec} rfc0001 +There are multiple forms of string literals: + +* {spec}`quoted` Single-quoted string literals begin with a `"` and end with a `"`. +* {spec}`triple-quoted` Triple-quoted string literals begin with a `"""` and end with a `"""`. +::: + +:::{spec} disambiguation rfc0001 +The syntax `"""` is interpreted as the start of a triple-quoted string literal +and not a single-quoted string literal followed by the start of another single-quoted string literal. +::: + +:::{spec} triple-quote-termination rfc0001 +A triple-quoted string literal cannot contain three consecutive unescaped double-quote characters. +::: + +## Type + +:::{spec} rfc0001 +String literals have type `my String`. ::: ## Escape Sequences -*To be specified* +:::{spec} rfc0001 +String literals support the following escape sequences: + +* {spec}`backslash` `\\` produces a literal backslash. +* {spec}`double-quote` `\"` produces a literal double quote. +* {spec}`newline` `\n` produces a newline. +* {spec}`carriage-return` `\r` produces a carriage return. +* {spec}`tab` `\t` produces a tab. +* {spec}`open-brace` `\{` produces a literal `{`. +* {spec}`close-brace` `\}` produces a literal `}`. +::: + +:::{spec} triple-quoted rfc0001 +The `\"` escape sequence is not needed in triple-quoted strings, +since embedded double quotes do not terminate the string. +::: + +:::{spec} invalid rfc0001 +A `\` followed by a character not listed above is an error. +::: + +## Interpolation + +:::{spec} rfc0001 unimpl +String literals may contain interpolation expressions +delimited by curly braces (`{` and `}`). +Any valid Dada expression may appear inside the braces. +::: + +:::{spec} brace-escaping rfc0001 +Literal brace characters are produced by the `\{` and `\}` escape sequences. +::: + +:::{spec} nesting rfc0001 unimpl +The lexer tracks brace nesting depth, +so that braces within interpolated expressions (e.g., block expressions, struct literals) +do not prematurely terminate the interpolation. +::: + +:::{spec} nested-quotes rfc0001 unimpl +Quotes inside interpolated expressions do not terminate the enclosing string literal. +::: + +:::{spec} scope rfc0001 unimpl +Interpolated expressions are evaluated at runtime in the enclosing scope. +::: + +:::{spec} order rfc0001 unimpl +Interpolated expressions are evaluated left-to-right. +::: + +:::{spec} type-check rfc0001 unimpl +Each interpolated expression must produce a value that can be converted to a string. +This is checked at compile time. +::: + +:::{spec} permissions rfc0001 unimpl +The permission system applies normally to interpolated expressions. +::: + +## Multiline Strings + +:::{spec} rfc0001 +A string literal that begins with a newline immediately after the opening quote +(either `"` or `"""`) is a multiline string literal +with automatic indentation handling. +::: + +:::{spec} leading-newline rfc0001 +The leading newline immediately after the opening quote is removed. +::: + +:::{spec} trailing-whitespace rfc0001 +The trailing newline immediately before the closing quote is removed, +along with any whitespace on the final line. +::: + +:::{spec} dedenting rfc0001 +The common whitespace prefix across all non-empty lines is removed +from the start of each line. +::: + +:::{spec} escape-sequences-are-content rfc0001 +Escape sequences are part of the string content, not whitespace. +They are not affected by leading/trailing stripping or dedenting. +::: + +:::{spec} raw rfc0001 +A string literal beginning with `"\` followed by a newline +disables automatic dedenting. +The string preserves its content exactly as written, +including the leading newline and all indentation. +::: + +## String Conversion + +:::{spec} rfc0001 unimpl +Interpolated expressions must produce values that can be converted to strings. +The exact conversion mechanism is not yet defined +and depends on Dada's trait/interface system. +::: -## Interpolated Strings +## Implementation Notes -*To be specified - see RFC-0001* \ No newline at end of file +> A string literal with no interpolation expressions can be compiled +> as a simple string constant with no runtime overhead. diff --git a/spec/src/syntax/types-and-permissions.md b/spec/src/syntax/types-and-permissions.md new file mode 100644 index 00000000..9ef28db2 --- /dev/null +++ b/spec/src/syntax/types-and-permissions.md @@ -0,0 +1,74 @@ +# Types and Permissions + +This chapter specifies the syntax for types and permissions in Dada. + +## Types + +### Named Types + +:::{spec} +A type may be a simple name: `String`, `i32`, `bool`. +::: + +:::{spec} paths +A type may be a dotted path: `module.Type`. +::: + +### Generic Application + +:::{spec} +A type may be applied to generic arguments in square brackets: +`Vec[String]`, `Pair[i32, bool]`. +::: + +### Permission-Qualified Types + +:::{spec} +A type may be preceded by a permission to form a permission-qualified type: +`my String`, `ref Point`, `mut Vec[i32]`. +::: + +## Permissions + +:::{spec} +The following permission keywords are available: + +* {spec}`my` `my` — exclusive ownership. +* {spec}`our` `our` — shared ownership. +* {spec}`ref` `ref` — immutable reference. +* {spec}`mut` `mut` — mutable reference. +* {spec}`given` `given` — a permission supplied by the caller. +::: + +### Place Lists + +:::{spec} +The permissions `ref`, `mut`, and `given` may include a place list +in square brackets specifying which places they refer to: +`ref[x, y]`, `mut[self]`, `given[p]`. +::: + +:::{spec} place-list-optional +The place list is optional. +When omitted, the permission applies without place restrictions. +::: + +## Generic Declarations + +### In Type Position + +:::{spec} +A generic type parameter is declared as `type T`. +::: + +:::{spec} permission-declaration +A generic permission parameter is declared as `perm P`. +::: + +### Ambiguity + +:::{spec} +A single identifier in a generic position is ambiguous +between a type and a permission. +The ambiguity is resolved during type checking, not parsing. +::: diff --git a/tests/adhoc/ast_probe.dada b/tests/adhoc/ast_probe.dada new file mode 100644 index 00000000..c042e010 --- /dev/null +++ b/tests/adhoc/ast_probe.dada @@ -0,0 +1,11 @@ +#:skip_codegen + +async fn main() { + # Test Ast probe on a string literal + print("hello").await + #? ^^^^^^^ Ast: Literal(String, "hello") + + # Test Ast probe on a function call + print("hello").await + #? ^^^^^^^^^ Ast: ParenthesisOp(Id(print), [Literal(String, "hello")]) +} diff --git a/tests/class_inputs.dada b/tests/adhoc/class_inputs.dada similarity index 100% rename from tests/class_inputs.dada rename to tests/adhoc/class_inputs.dada diff --git a/tests/escape_sequences.dada b/tests/adhoc/escape_sequences.dada similarity index 100% rename from tests/escape_sequences.dada rename to tests/adhoc/escape_sequences.dada diff --git a/tests/hello_world.dada b/tests/adhoc/hello_world.dada similarity index 100% rename from tests/hello_world.dada rename to tests/adhoc/hello_world.dada diff --git a/tests/adhoc/test_spec_parsing.dada b/tests/adhoc/test_spec_parsing.dada new file mode 100644 index 00000000..c2ecc12e --- /dev/null +++ b/tests/adhoc/test_spec_parsing.dada @@ -0,0 +1,4 @@ +#:spec syntax.string-literals.delimiters.quoted +#:skip_codegen + +print("hello") \ No newline at end of file diff --git a/tests/syntax/expressions/newline_sensitivity.dada b/tests/syntax/expressions/newline_sensitivity.dada new file mode 100644 index 00000000..7cac61da --- /dev/null +++ b/tests/syntax/expressions/newline_sensitivity.dada @@ -0,0 +1,20 @@ +#:spec syntax.expressions.newline-sensitivity +#:skip_codegen +#:FIXME_ICE + +fn main() { + # binary op on same line: parsed as one expression + let a = 1 + 2 + #? ^^^^^ Ast: BinaryOp(+, Literal(Integer, "1"), Literal(Integer, "2")) + set(a) + + # operator on new line: parsed as separate expressions + # (the second line is a new expression starting with +) + let b = 1 + #? ^ Ast: Literal(Integer, "1") + + 2 + set(b) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/operator_precedence/arithmetic.dada b/tests/syntax/expressions/operator_precedence/arithmetic.dada new file mode 100644 index 00000000..5db35182 --- /dev/null +++ b/tests/syntax/expressions/operator_precedence/arithmetic.dada @@ -0,0 +1,47 @@ +#:spec syntax.expressions.addexpr-definition +#:skip_codegen + +fn main() { + # addition + let a = 1 + 2 + #? ^^^^^ Ast: BinaryOp(+, Literal(Integer, "1"), Literal(Integer, "2")) + set(a) + + # subtraction + let b = 5 - 3 + #? ^^^^^ Ast: BinaryOp(-, Literal(Integer, "5"), Literal(Integer, "3")) + set(b) + + # multiplication + let c = 2 * 3 + #? ^^^^^ Ast: BinaryOp(*, Literal(Integer, "2"), Literal(Integer, "3")) + set(c) + + # division + let d = 6 / 2 + #? ^^^^^ Ast: BinaryOp(/, Literal(Integer, "6"), Literal(Integer, "2")) + set(d) + + # mul binds tighter than add: 1 + 2 * 3 = 1 + (2 * 3) + let e = 1 + 2 * 3 + #? ^^^^^^^^^ Ast: BinaryOp(+, Literal(Integer, "1"), BinaryOp(*, Literal(Integer, "2"), Literal(Integer, "3"))) + set(e) + + # right-associative: 1 + 2 + 3 = 1 + (2 + 3) + let f = 1 + 2 + 3 + #? ^^^^^^^^^ Ast: BinaryOp(+, Literal(Integer, "1"), BinaryOp(+, Literal(Integer, "2"), Literal(Integer, "3"))) + set(f) + + # right-associative: 6 / 3 / 2 = 6 / (3 / 2) + let g = 6 / 3 / 2 + #? ^^^^^^^^^ Ast: BinaryOp(/, Literal(Integer, "6"), BinaryOp(/, Literal(Integer, "3"), Literal(Integer, "2"))) + set(g) + + # mixed: 1 * 2 + 3 * 4 = (1 * 2) + (3 * 4) + let h = 1 * 2 + 3 * 4 + #? ^^^^^^^^^^^^^ Ast: BinaryOp(+, BinaryOp(*, Literal(Integer, "1"), Literal(Integer, "2")), BinaryOp(*, Literal(Integer, "3"), Literal(Integer, "4"))) + set(h) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/operator_precedence/assignment.dada b/tests/syntax/expressions/operator_precedence/assignment.dada new file mode 100644 index 00000000..05407c60 --- /dev/null +++ b/tests/syntax/expressions/operator_precedence/assignment.dada @@ -0,0 +1,20 @@ +#:spec syntax.expressions.assignexpr-definition +#:skip_codegen +#:FIXME + +fn main() { + # simple assignment + let mut x = 0 + x = 42 +#? ^^^^^^ Ast: BinaryOp(=, Id(x), Literal(Integer, "42")) + set(x) + + # assignment binds tighter than addition: y = 1 + 2 parses as (y = 1) + 2 + let mut y = 0 + y = 1 + 2 +#? ^^^^^^^^^ Ast: BinaryOp(+, BinaryOp(=, Id(y), Literal(Integer, "1")), Literal(Integer, "2")) + set(y) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/operator_precedence/comparison.dada b/tests/syntax/expressions/operator_precedence/comparison.dada new file mode 100644 index 00000000..13f5ade2 --- /dev/null +++ b/tests/syntax/expressions/operator_precedence/comparison.dada @@ -0,0 +1,38 @@ +#:spec syntax.expressions.compareexpr-definition +#:skip_codegen +#:FIXME_ICE + +fn main() { + # equal + let a = 1 == 2 + #? ^^^^^^ Ast: BinaryOp(==, Literal(Integer, "1"), Literal(Integer, "2")) + check(a) + + # less than + let b = 1 < 2 + #? ^^^^^ Ast: BinaryOp(<, Literal(Integer, "1"), Literal(Integer, "2")) + check(b) + + # greater than + let c = 1 > 2 + #? ^^^^^ Ast: BinaryOp(>, Literal(Integer, "1"), Literal(Integer, "2")) + check(c) + + # less than or equal + let d = 1 <= 2 + #? ^^^^^^ Ast: BinaryOp(<=, Literal(Integer, "1"), Literal(Integer, "2")) + check(d) + + # greater than or equal + let e = 1 >= 2 + #? ^^^^^^ Ast: BinaryOp(>=, Literal(Integer, "1"), Literal(Integer, "2")) + check(e) + + # comparison has lower precedence than addition: 1 + 2 == 3 + let f = 1 + 2 == 3 + #? ^^^^^^^^^^ Ast: BinaryOp(==, BinaryOp(+, Literal(Integer, "1"), Literal(Integer, "2")), Literal(Integer, "3")) + check(f) +} + +fn check(x: bool) { +} diff --git a/tests/syntax/expressions/operator_precedence/logical.dada b/tests/syntax/expressions/operator_precedence/logical.dada new file mode 100644 index 00000000..6dc8abb9 --- /dev/null +++ b/tests/syntax/expressions/operator_precedence/logical.dada @@ -0,0 +1,27 @@ +#:spec syntax.expressions.orexpr-definition +#:skip_codegen + +fn main() { + # logical or + let a = true || false + #? ^^^^^^^^^^^^^ Ast: BinaryOp(||, Literal(Boolean, "true"), Literal(Boolean, "false")) + check(a) + + # logical and + let b = true && false + #? ^^^^^^^^^^^^^ Ast: BinaryOp(&&, Literal(Boolean, "true"), Literal(Boolean, "false")) + check(b) + + # or binds tighter than and: a || b && c = (a || b) && c + let c = true || false && true + #? ^^^^^^^^^^^^^^^^^^^^^ Ast: BinaryOp(&&, BinaryOp(||, Literal(Boolean, "true"), Literal(Boolean, "false")), Literal(Boolean, "true")) + check(c) + + # right-associative: a && b && c = a && (b && c) + let d = true && false && true + #? ^^^^^^^^^^^^^^^^^^^^^ Ast: BinaryOp(&&, Literal(Boolean, "true"), BinaryOp(&&, Literal(Boolean, "false"), Literal(Boolean, "true"))) + check(d) +} + +fn check(x: bool) { +} diff --git a/tests/syntax/expressions/postfix/await.dada b/tests/syntax/expressions/postfix/await.dada new file mode 100644 index 00000000..b0f752fb --- /dev/null +++ b/tests/syntax/expressions/postfix/await.dada @@ -0,0 +1,8 @@ +#:spec syntax.expressions.postfixexpr-definition.await-definition +#:skip_codegen + +async fn main() { + # the Await probe on the whole expression + print("hello").await + #? ^^^^^^^^^^^^^^ Ast: Await(ParenthesisOp(Id(print), [Literal(String, "hello")])) +} diff --git a/tests/syntax/expressions/postfix/calls.dada b/tests/syntax/expressions/postfix/calls.dada new file mode 100644 index 00000000..83a85491 --- /dev/null +++ b/tests/syntax/expressions/postfix/calls.dada @@ -0,0 +1,16 @@ +#:spec syntax.expressions.postfixexpr-definition.call-definition +#:skip_codegen + +async fn main() { + # function call with one argument + print("hello").await + #? ^^^^^^^^^^ Ast: ParenthesisOp(Id(print), [Literal(String, "hello")]) + + # function call with no arguments + greet().await + #? ^^^^^^^ Ast: Await(ParenthesisOp(Id(greet), [])) +} + +async fn greet() { + print("hi").await +} diff --git a/tests/syntax/expressions/postfix/field_access.dada b/tests/syntax/expressions/postfix/field_access.dada new file mode 100644 index 00000000..4b604b85 --- /dev/null +++ b/tests/syntax/expressions/postfix/field_access.dada @@ -0,0 +1,18 @@ +#:spec syntax.expressions.postfixexpr-definition.fieldaccess-definition +#:skip_codegen +#:FIXME_ICE + +class Point(x: u32, y: u32) + +fn main() { + let p = Point { x: 1, y: 2 } + p.x + #? ^^^ Ast: DotId(Id(p), x) + + # chained field access + p.x + #? ^ Ast: Id(p) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/postfix/permission_ops.dada b/tests/syntax/expressions/postfix/permission_ops.dada new file mode 100644 index 00000000..4f77e826 --- /dev/null +++ b/tests/syntax/expressions/postfix/permission_ops.dada @@ -0,0 +1,31 @@ +#:spec syntax.expressions.postfixexpr-definition.permissionop-definition +#:skip_codegen +#:FIXME_ICE + +class Point(x: u32, y: u32) + +fn main() { + let p = Point { x: 1, y: 2 } + + # .give transfers ownership + p.give + #? ^^^^^^ Ast: PermissionOp(Give, Id(p)) + + let q = Point { x: 3, y: 4 } + + # .share creates a shared reference + q.share + #? ^^^^^^^ Ast: PermissionOp(Share, Id(q)) + + let r = Point { x: 5, y: 6 } + + # .mut creates a mutable lease + r.mut + #? ^^^^^ Ast: PermissionOp(Mutate, Id(r)) + + let s = Point { x: 7, y: 8 } + + # .ref creates an immutable reference + s.ref + #? ^^^^^ Ast: PermissionOp(Reference, Id(s)) +} diff --git a/tests/syntax/expressions/primary/constructor.dada b/tests/syntax/expressions/primary/constructor.dada new file mode 100644 index 00000000..e4a81cb1 --- /dev/null +++ b/tests/syntax/expressions/primary/constructor.dada @@ -0,0 +1,15 @@ +#:spec syntax.expressions.primaryexpr-definition.constructorexpr-definition +#:skip_codegen +#:FIXME_ICE + +class Point(x: u32, y: u32) + +fn main() { + # constructor with fields + Point { x: 1, y: 2 } + #? ^^^^^^^^^^^^^^^^^^^^^ Ast: Constructor(Point, [x: Literal(Integer, "1"), y: Literal(Integer, "2")]) + + # constructor with expressions + Point { x: 1 + 2, y: 3 * 4 } + #? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Ast: Constructor(Point, [x: BinaryOp(+, Literal(Integer, "1"), Literal(Integer, "2")), y: BinaryOp(*, Literal(Integer, "3"), Literal(Integer, "4"))]) +} diff --git a/tests/syntax/expressions/primary/identifier.dada b/tests/syntax/expressions/primary/identifier.dada new file mode 100644 index 00000000..9622f081 --- /dev/null +++ b/tests/syntax/expressions/primary/identifier.dada @@ -0,0 +1,12 @@ +#:spec syntax.expressions.primaryexpr-definition.identifier +#:skip_codegen + +fn main() { + let x = 42 + #? ^^ Ast: Literal(Integer, "42") + set(x) + #? ^ Ast: Id(x) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/primary/if_expr.dada b/tests/syntax/expressions/primary/if_expr.dada new file mode 100644 index 00000000..211e01fc --- /dev/null +++ b/tests/syntax/expressions/primary/if_expr.dada @@ -0,0 +1,21 @@ +#:spec syntax.expressions.primaryexpr-definition.ifexpr-definition +#:skip_codegen + +fn main() { + # simple if + let x = if true { 1 } else { 2 } + #? ^^^^^^^^^^^^^^^^^^^^^^^^ Ast: If([Literal(Boolean, "true") => Block([Literal(Integer, "1")]), else => Block([Literal(Integer, "2")])]) + set(x) + + # if-else-if + let y = if true { 1 } else if false { 2 } else { 3 } + #? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Ast: If([Literal(Boolean, "true") => Block([Literal(Integer, "1")]), Literal(Boolean, "false") => Block([Literal(Integer, "2")]), else => Block([Literal(Integer, "3")])]) + set(y) + + # if without else + if true { set(1) } +#? ^^^^^^^^^^^^^^^^^^ Ast: If([Literal(Boolean, "true") => Block([ParenthesisOp(Id(set), [Literal(Integer, "1")])])]) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/primary/literals.dada b/tests/syntax/expressions/primary/literals.dada new file mode 100644 index 00000000..675b763a --- /dev/null +++ b/tests/syntax/expressions/primary/literals.dada @@ -0,0 +1,28 @@ +#:spec syntax.expressions.primaryexpr-definition.literal-nt +#:skip_codegen + +async fn main() { + # string literal + print("hello").await + #? ^^^^^^^ Ast: Literal(String, "hello") + + # integer literal + let x = 42 + #? ^^ Ast: Literal(Integer, "42") + set(x) + + # boolean literals + let t = true + #? ^^^^ Ast: Literal(Boolean, "true") + check(t) + + let f = false + #? ^^^^^ Ast: Literal(Boolean, "false") + check(f) +} + +fn set(x: u32) { +} + +fn check(x: bool) { +} diff --git a/tests/syntax/expressions/primary/paren_expr.dada b/tests/syntax/expressions/primary/paren_expr.dada new file mode 100644 index 00000000..be358224 --- /dev/null +++ b/tests/syntax/expressions/primary/paren_expr.dada @@ -0,0 +1,16 @@ +#:spec syntax.expressions.primaryexpr-definition.paren-expr +#:skip_codegen +#:FIXME + +fn main() { + # parenthesized expression changes precedence + let x = (1 + 2) * 3 + set(x) + + # nested parentheses + let y = ((42)) + set(y) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/primary/return_expr.dada b/tests/syntax/expressions/primary/return_expr.dada new file mode 100644 index 00000000..781181d6 --- /dev/null +++ b/tests/syntax/expressions/primary/return_expr.dada @@ -0,0 +1,20 @@ +#:spec syntax.expressions.primaryexpr-definition.returnexpr-definition +#:skip_codegen + +fn add(a: u32, b: u32) -> u32 { + return a + b +#? ^^^^^^^^^^^^ Ast: Return(BinaryOp(+, Id(a), Id(b))) +} + +fn nothing() { + return +#? ^^^^^^ Ast: Return +} + +fn main() { + let x = add(1, 2) + set(x) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/unary/negate.dada b/tests/syntax/expressions/unary/negate.dada new file mode 100644 index 00000000..c71ea833 --- /dev/null +++ b/tests/syntax/expressions/unary/negate.dada @@ -0,0 +1,23 @@ +#:spec syntax.expressions.unaryexpr-definition.negate +#:skip_codegen +#:FIXME_ICE + +fn main() { + # arithmetic negation + let a = -42 + #? ^^^ Ast: UnaryOp(-, Literal(Integer, "42")) + set(a) + + # double negation + let b = --1 + #? ^^^ Ast: UnaryOp(-, UnaryOp(-, Literal(Integer, "1"))) + set(b) + + # negate binds tighter than binary ops: -1 + 2 = (-1) + 2 + let c = -1 + 2 + #? ^^^^^^ Ast: BinaryOp(+, UnaryOp(-, Literal(Integer, "1")), Literal(Integer, "2")) + set(c) +} + +fn set(x: u32) { +} diff --git a/tests/syntax/expressions/unary/not.dada b/tests/syntax/expressions/unary/not.dada new file mode 100644 index 00000000..6e0e249f --- /dev/null +++ b/tests/syntax/expressions/unary/not.dada @@ -0,0 +1,16 @@ +#:spec syntax.expressions.unaryexpr-definition.not +#:skip_codegen + +fn main() { + # logical not + let a = !true + #? ^^^^^ Ast: UnaryOp(!, Literal(Boolean, "true")) + check(a) + + let b = !false + #? ^^^^^^ Ast: UnaryOp(!, Literal(Boolean, "false")) + check(b) +} + +fn check(x: bool) { +} diff --git a/tests/syntax/string_literals/delimiters/disambiguation.dada b/tests/syntax/string_literals/delimiters/disambiguation.dada new file mode 100644 index 00000000..e827e452 --- /dev/null +++ b/tests/syntax/string_literals/delimiters/disambiguation.dada @@ -0,0 +1,7 @@ +#:spec syntax.string-literals.delimiters.disambiguation +#:skip_codegen + +async fn main() { + # """ is a triple-quoted string, not "" followed by " + print("""hello""").await +} diff --git a/tests/syntax/string_literals/delimiters/quoted.dada b/tests/syntax/string_literals/delimiters/quoted.dada new file mode 100644 index 00000000..91307bf8 --- /dev/null +++ b/tests/syntax/string_literals/delimiters/quoted.dada @@ -0,0 +1,9 @@ +#:spec syntax.string-literals.delimiters.quoted +#:skip_codegen + +async fn main() { + print("hello").await + print("").await + print("a").await + print("hello world").await +} diff --git a/tests/syntax/string_literals/delimiters/triple_quote_termination.dada b/tests/syntax/string_literals/delimiters/triple_quote_termination.dada new file mode 100644 index 00000000..05dddb6a --- /dev/null +++ b/tests/syntax/string_literals/delimiters/triple_quote_termination.dada @@ -0,0 +1,13 @@ +#:spec syntax.string-literals.delimiters.triple-quote-termination +#:skip_codegen + +# A triple-quoted string cannot contain three consecutive unescaped +# double-quote characters — the first """ always terminates the string. +# So """hello"""world""" is parsed as the string "hello" followed by +# the identifier `world` and then an unterminated string. + +async fn main() { + print("""hello"""world""").await + #! ^^^^^ /extra input + #! /missing end quotes for triple-quoted string +} diff --git a/tests/syntax/string_literals/delimiters/triple_quoted.dada b/tests/syntax/string_literals/delimiters/triple_quoted.dada new file mode 100644 index 00000000..1ac63538 --- /dev/null +++ b/tests/syntax/string_literals/delimiters/triple_quoted.dada @@ -0,0 +1,10 @@ +#:spec syntax.string-literals.delimiters.triple-quoted +#:skip_codegen + +async fn main() { + print("""hello""").await + print("""""").await + print("""hello world""").await + print("""hello +world""").await +} diff --git a/tests/syntax/string_literals/escape_sequences/backslash.dada b/tests/syntax/string_literals/escape_sequences/backslash.dada new file mode 100644 index 00000000..10e21f78 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/backslash.dada @@ -0,0 +1,8 @@ +#:spec syntax.string-literals.escape-sequences.backslash +#:skip_codegen + +async fn main() { + print("hello\\world").await + print("\\").await + print("\\\\").await +} diff --git a/tests/syntax/string_literals/escape_sequences/carriage_return.dada b/tests/syntax/string_literals/escape_sequences/carriage_return.dada new file mode 100644 index 00000000..1bf07d69 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/carriage_return.dada @@ -0,0 +1,7 @@ +#:spec syntax.string-literals.escape-sequences.carriage-return +#:skip_codegen + +async fn main() { + print("hello\rworld").await + print("\r\n").await +} diff --git a/tests/syntax/string_literals/escape_sequences/close_brace.dada b/tests/syntax/string_literals/escape_sequences/close_brace.dada new file mode 100644 index 00000000..a8f14b78 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/close_brace.dada @@ -0,0 +1,11 @@ +#:spec syntax.string-literals.escape-sequences.close-brace +#:skip_codegen + +# 💡 Using balanced \{ and \} in same string because the tokenizer's +# delimited() scanner doesn't skip string contents — unbalanced braces +# inside strings confuse brace-depth tracking. This is a known bug that +# will be fixed when implementing interpolation (which requires the same fix). +async fn main() { + print("\{\}").await + print("before\{middle\}after").await +} diff --git a/tests/syntax/string_literals/escape_sequences/double_quote.dada b/tests/syntax/string_literals/escape_sequences/double_quote.dada new file mode 100644 index 00000000..f70ef7a6 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/double_quote.dada @@ -0,0 +1,8 @@ +#:spec syntax.string-literals.escape-sequences.double-quote +#:skip_codegen + +async fn main() { + print("\"").await + print("hello\"world").await + print("\"hello\"").await +} diff --git a/tests/syntax/string_literals/escape_sequences/invalid.dada b/tests/syntax/string_literals/escape_sequences/invalid.dada new file mode 100644 index 00000000..5ab5a177 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/invalid.dada @@ -0,0 +1,11 @@ +#:spec syntax.string-literals.escape-sequences.invalid +#:skip_codegen + +async fn main() { + print("\a").await + #! ^ /invalid escape + print("\z").await + #! ^ /invalid escape + print("\1").await + #! ^ /invalid escape +} diff --git a/tests/syntax/string_literals/escape_sequences/newline.dada b/tests/syntax/string_literals/escape_sequences/newline.dada new file mode 100644 index 00000000..2196edd6 --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/newline.dada @@ -0,0 +1,8 @@ +#:spec syntax.string-literals.escape-sequences.newline +#:skip_codegen + +async fn main() { + print("hello\nworld").await + print("\n").await + print("a\nb\nc").await +} diff --git a/tests/syntax/string_literals/escape_sequences/open_brace.dada b/tests/syntax/string_literals/escape_sequences/open_brace.dada new file mode 100644 index 00000000..a2e71a8d --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/open_brace.dada @@ -0,0 +1,11 @@ +#:spec syntax.string-literals.escape-sequences.open-brace +#:skip_codegen + +# 💡 Using balanced \{ and \} in same string because the tokenizer's +# delimited() scanner doesn't skip string contents — unbalanced braces +# inside strings confuse brace-depth tracking. This is a known bug that +# will be fixed when implementing interpolation (which requires the same fix). +async fn main() { + print("\{\}").await + print("hello\{world\}").await +} diff --git a/tests/syntax/string_literals/escape_sequences/tab.dada b/tests/syntax/string_literals/escape_sequences/tab.dada new file mode 100644 index 00000000..67fcb74a --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/tab.dada @@ -0,0 +1,8 @@ +#:spec syntax.string-literals.escape-sequences.tab +#:skip_codegen + +async fn main() { + print("hello\tworld").await + print("\t").await + print("col1\tcol2\tcol3").await +} diff --git a/tests/syntax/string_literals/escape_sequences/triple_quoted.dada b/tests/syntax/string_literals/escape_sequences/triple_quoted.dada new file mode 100644 index 00000000..a290b7ff --- /dev/null +++ b/tests/syntax/string_literals/escape_sequences/triple_quoted.dada @@ -0,0 +1,12 @@ +#:spec syntax.string-literals.escape-sequences.triple-quoted +#:skip_codegen + +async fn main() { + # \" is not needed in triple-quoted strings + print("""hello "world" goodbye""").await + # One or two embedded quotes are fine + print("""she said "hi" and "bye" """).await + print(""" "quoted" """).await + # Two consecutive quotes inside triple-quoted strings + print("""empty "" inside""").await +} diff --git a/tests/syntax/string_literals/interpolation/brace_escaping.dada b/tests/syntax/string_literals/interpolation/brace_escaping.dada new file mode 100644 index 00000000..90279560 --- /dev/null +++ b/tests/syntax/string_literals/interpolation/brace_escaping.dada @@ -0,0 +1,8 @@ +#:spec syntax.string-literals.interpolation.brace-escaping +#:skip_codegen + +async fn main() { + print("\{hello\}").await + print("\{\}").await + print("before\{middle\}after").await +} diff --git a/tests/syntax/string_literals/multiline_strings/dedenting.dada b/tests/syntax/string_literals/multiline_strings/dedenting.dada new file mode 100644 index 00000000..6d79d939 --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/dedenting.dada @@ -0,0 +1,36 @@ +#:spec syntax.string-literals.multiline-strings.dedenting +#:skip_codegen + +async fn main() { + # Common whitespace prefix is removed from all lines + let s = " + Line 1 + Line 2 + Line 3 + " + #? ^ Ast: Literal(String, "Line 1\nLine 2\nLine 3") + + # Nested indentation is preserved relative to the common prefix + let t = " + Header + Indented + More indented + " + #? ^ Ast: Literal(String, "Header\n Indented\n More indented") + + # Empty lines don't affect the common prefix calculation + let u = " + Line 1 + + Line 3 + " + #? ^ Ast: Literal(String, "Line 1\n\nLine 3") + + # Multiple lines with varying indentation + let v = " + a + b + c + " + #? ^ Ast: Literal(String, "a\n b\nc") +} diff --git a/tests/syntax/string_literals/multiline_strings/escape_sequences_are_content.dada b/tests/syntax/string_literals/multiline_strings/escape_sequences_are_content.dada new file mode 100644 index 00000000..5353391a --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/escape_sequences_are_content.dada @@ -0,0 +1,18 @@ +#:spec syntax.string-literals.multiline-strings.escape-sequences-are-content +#:skip_codegen + +async fn main() { + # Escape sequences are content, not whitespace — they survive stripping. + # A trailing \n escape before the closing quote line is preserved. + let s = " + Line 1 + Line 2\n + " + #? ^ Ast: Literal(String, "Line 1\nLine 2\n") + + # An escaped newline in the middle of content is preserved + let t = " + hello\nworld + " + #? ^ Ast: Literal(String, "hello\nworld") +} diff --git a/tests/syntax/string_literals/multiline_strings/leading_newline.dada b/tests/syntax/string_literals/multiline_strings/leading_newline.dada new file mode 100644 index 00000000..3d87b044 --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/leading_newline.dada @@ -0,0 +1,16 @@ +#:spec syntax.string-literals.multiline-strings.leading-newline +#:skip_codegen + +async fn main() { + # The leading newline immediately after the opening quote is removed + let s = " + hello + " + #? ^ Ast: Literal(String, "hello") + + # Same with triple-quoted + let t = """ + hello + """ + #? ^^^ Ast: Literal(String, "hello") +} diff --git a/tests/syntax/string_literals/multiline_strings/multiline.dada b/tests/syntax/string_literals/multiline_strings/multiline.dada new file mode 100644 index 00000000..829d0f74 --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/multiline.dada @@ -0,0 +1,21 @@ +#:spec syntax.string-literals.multiline-strings +#:skip_codegen + +async fn main() { + # A string beginning with a newline after the opening quote is multiline. + # Leading newline, trailing whitespace, and common indentation are removed. + let s = " + hello world + " + #? ^ Ast: Literal(String, "hello world") + + # Works with triple-quoted strings too + let t = """ + hello world + """ + #? ^^^ Ast: Literal(String, "hello world") + + # A string NOT beginning with a newline is not multiline (no dedenting) + let u = "hello world" + #? ^^^^^^^^^^^^^ Ast: Literal(String, "hello world") +} diff --git a/tests/syntax/string_literals/multiline_strings/raw.dada b/tests/syntax/string_literals/multiline_strings/raw.dada new file mode 100644 index 00000000..b86e50cd --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/raw.dada @@ -0,0 +1,27 @@ +#:spec syntax.string-literals.multiline-strings.raw +#:skip_codegen + +async fn main() { + # Raw string: "\ prefix disables dedenting + # The leading newline is preserved, indentation is preserved exactly + let s = "\ + hello + world + " + #? ^ Ast: Literal(String, "\n hello\n world\n ") + + # Compare: without raw prefix, dedenting occurs + let t = " + hello + world + " + #? ^ Ast: Literal(String, "hello\nworld") + + # Raw string with varying indentation — all preserved exactly + let u = "\ + a + b + c + " + #? ^ Ast: Literal(String, "\n a\n b\n c\n ") +} diff --git a/tests/syntax/string_literals/multiline_strings/trailing_whitespace.dada b/tests/syntax/string_literals/multiline_strings/trailing_whitespace.dada new file mode 100644 index 00000000..e018f482 --- /dev/null +++ b/tests/syntax/string_literals/multiline_strings/trailing_whitespace.dada @@ -0,0 +1,20 @@ +#:spec syntax.string-literals.multiline-strings.trailing-whitespace +#:skip_codegen + +async fn main() { + # The trailing newline and whitespace before the closing quote is removed. + # Closing quote indentation does not affect string content. + + # Standard case: trailing newline + spaces stripped + let s = " + hello + " + #? ^ Ast: Literal(String, "hello") + + # Multiple content lines: trailing line stripped + let t = " + hello + world + " + #? ^ Ast: Literal(String, "hello\nworld") +} diff --git a/tests/syntax/string_literals/type.dada b/tests/syntax/string_literals/type.dada new file mode 100644 index 00000000..967fab21 --- /dev/null +++ b/tests/syntax/string_literals/type.dada @@ -0,0 +1,7 @@ +#:spec syntax.string-literals.type +#:skip_codegen + +fn main() { + let x = "hello" + #? ^ VariableType: String +} diff --git a/tests/test_spec_parsing.dada b/tests/test_spec_parsing.dada deleted file mode 100644 index 32b8c20d..00000000 --- a/tests/test_spec_parsing.dada +++ /dev/null @@ -1,4 +0,0 @@ -#:spec syntax.string-literals.basic -#:skip_codegen - -print("hello") \ No newline at end of file