diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f71789..020cc1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,10 +41,20 @@ jobs: run: npm ci - name: Run standards checks - run: npm run standards - env: - # For PRs: lint from base branch. For pushes: lint from previous commit - COMMITLINT_FROM: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }} + run: | + # Determine commitlint range + if [ "${{ github.event_name }}" = "pull_request" ]; then + export COMMITLINT_FROM="${{ github.event.pull_request.base.sha }}" + else + # For pushes, verify the before commit exists + if git cat-file -e "${{ github.event.before }}" 2>/dev/null; then + export COMMITLINT_FROM="${{ github.event.before }}" + else + # Fallback: check only HEAD commit on force push or first push + export COMMITLINT_FROM="HEAD~1" + fi + fi + npm run standards - name: Run cargo check run: cargo check --workspace --all-targets diff --git a/crates/sqlx-sqlite-conn-mgr/README.md b/crates/sqlx-sqlite-conn-mgr/README.md index 670a79b..d59f689 100644 --- a/crates/sqlx-sqlite-conn-mgr/README.md +++ b/crates/sqlx-sqlite-conn-mgr/README.md @@ -12,12 +12,12 @@ management. Prevents violation of access policies and/or a glut of open file handles and (mostly) idle threads * **Connection pooling**: - * Read-only pool for concurrent reads (up to 6 connections) + * Read-only pool for concurrent reads (default: 6 connections, configurable) * **Lazy write pool**: Single write connection pool (max_connections=1) initialized on first use * **Exclusive write access**: WriteGuard ensures serialized writes - * **WAL mode**: Automatically enabled on first `acquire_writer()` call - (idempotent) + * **WAL mode**: Automatically enabled on first `acquire_writer()` call (setting + journal mode to WAL is safe and idempotent) * See [WAL documentation](https://www.sqlite.org/wal.html) for details * **30-second idle timeout**: Both read and write connections close after 30 seconds of inactivity @@ -44,18 +44,19 @@ use std::sync::Arc; #[tokio::main] async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> { // Connect to database (creates if missing, returns Arc) - let db = SqliteDatabase::connect("example.db").await?; + // (See below for how to customize the configuration) + let db = SqliteDatabase::connect("example.db", None).await?; // Multiple connects to the same path return the same instance - let db2 = SqliteDatabase::connect("example.db").await?; + let db2 = SqliteDatabase::connect("example.db", None).await?; assert!(Arc::ptr_eq(&db, &db2)); - // Use read_pool() for SELECT queries (supports concurrent reads) + // Use read_pool() for read queries (supports concurrent reads) let rows = query("SELECT * FROM users") .fetch_all(db.read_pool()?) .await?; - // Optionally acquire writer for INSERT/UPDATE/DELETE (exclusive access) + // Optionally acquire writer for write queries (exclusive access) // WAL mode is enabled automatically on first call let mut writer = db.acquire_writer().await?; query("INSERT INTO users (name) VALUES (?)") @@ -70,12 +71,39 @@ async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> { } ``` +### Custom Configuration + +Only customize the configuration when the defaults don't meet your requirements: + +```rust +use sqlx_sqlite_conn_mgr::{SqliteDatabase, SqliteDatabaseConfig}; +use std::time::Duration; + +#[tokio::main] +async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> { + // Only create custom configuration when defaults aren't suitable + let custom_config = SqliteDatabaseConfig { + max_read_connections: 10, + idle_timeout: Duration::from_secs(60), + }; + + // Pass custom configuration to connect() + let db = SqliteDatabase::connect("example.db", Some(custom_config)).await?; + + // Use the database as normal... + db.close().await?; + Ok(()) +} +``` + ## API Overview ### `SqliteDatabase` - * `connect(path)` - Connect to a database (creates if missing, returns cached - `Arc` if already open) + * `connect(path, custom_config)` - Connect to a database (creates if missing, + returns cached `Arc` if already open). Pass `None` for + `custom_config` to use defaults (recommended for most use cases), or + `Some(SqliteDatabaseConfig)` when you need to customize the configuration * `read_pool()` - Get reference to the read-only connection pool for read operations (returns `Result`) * `acquire_writer()` - Acquire exclusive write access (returns @@ -85,6 +113,15 @@ async fn main() -> Result<(), sqlx_sqlite_conn_mgr::Error> { * `close_and_remove()` - Close and delete all database files (.db, .db-wal, .db-shm) +### `SqliteDatabaseConfig` + +Configuration for connection pool behavior: + + * `max_read_connections: u32` - Maximum number of concurrent read connections + (default: 6) + * `idle_timeout: Duration` - How long idle connections remain open before + being closed (default: 30 seconds) + ### `WriteGuard` RAII guard that provides exclusive write access. Automatically returns the @@ -111,12 +148,11 @@ queries. is released via `WriteGuard` drop. 5. **Connection Management**: - * Read pool: 6 concurrent connections by default, 0 cached - * Can be configured via `SqliteDatabaseConfig` - * Write pool: max 1 connection, 0 cached - * Idle timeout: 30 seconds for both pools - * Can be configured via `SqliteDatabaseConfig` - * No perpetual caching to minimize idle thread overhead + * Read pool: 6 concurrent connections by default (configurable via `custom_config`) + * Write pool: max 1 connection + * Minimum connections: 0 (no perpetual caching) + * Idle timeout: 30 seconds by default (configurable via `custom_config`) + * Only customize `SqliteDatabaseConfig` when defaults don't meet your needs ## Error Handling diff --git a/crates/sqlx-sqlite-conn-mgr/src/error.rs b/crates/sqlx-sqlite-conn-mgr/src/error.rs index a6608ca..d3c02f5 100644 --- a/crates/sqlx-sqlite-conn-mgr/src/error.rs +++ b/crates/sqlx-sqlite-conn-mgr/src/error.rs @@ -18,6 +18,3 @@ pub enum Error { #[error("Database has been closed")] DatabaseClosed, } - -/// A type alias for Results with our Error type -pub type Result = std::result::Result; diff --git a/crates/sqlx-sqlite-conn-mgr/src/lib.rs b/crates/sqlx-sqlite-conn-mgr/src/lib.rs index b29c663..f48184a 100644 --- a/crates/sqlx-sqlite-conn-mgr/src/lib.rs +++ b/crates/sqlx-sqlite-conn-mgr/src/lib.rs @@ -12,22 +12,68 @@ //! //! ## Architecture //! -//! - **Dual pools**: Separate read-only pool (max 6 connections) and write pool (max 1 connection) +//! - **Connection pooling**: Separate read-only pool and write pool with a max of 1 connection //! - **Lazy WAL mode**: Write-Ahead Logging enabled automatically on first write //! - **Exclusive writes**: Single-connection write pool enforces serialized write access //! - **Concurrent reads**: Multiple readers can query simultaneously via the read pool - -// TODO: Remove these allows once implementation is complete -#![allow(dead_code)] +//! +//! ## Usage +//! +//! // TODO: Remove this ignore once implementation is complete +//! ```ignore +//! use sqlx_sqlite_conn_mgr::SqliteDatabase; +//! use std::sync::Arc; +//! +//! #[tokio::main] +//! async fn main() -> sqlx_sqlite_conn_mgr::Result<()> { +//! // Connect returns Arc +//! let db = SqliteDatabase::connect("example.db", None).await?; +//! +//! // Multiple connects to the same path return the same instance +//! let db2 = SqliteDatabase::connect("example.db", None).await?; +//! assert!(Arc::ptr_eq(&db, &db2)); +//! +//! // Use read_pool() for read queries (concurrent reads) +//! let rows = sqlx::query("SELECT * FROM users") +//! .fetch_all(db.read_pool()?) +//! .await?; +//! +//! // Optionally acquire writer for write queries (exclusive) +//! // WAL mode is enabled automatically on first call +//! let mut writer = db.acquire_writer().await?; +//! sqlx::query("INSERT INTO users (name) VALUES (?)") +//! .bind("Alice") +//! .execute(&mut *writer) +//! .await?; +//! +//! // Close when done +//! db.close().await?; +//! Ok(()) +//! } +//! ``` +//! +//! ## Design Principles +//! +//! - Uses sqlx's `SqlitePoolOptions` for all pool configuration +//! - Uses sqlx's `SqliteConnectOptions` for connection flags and configuration +//! - Minimal custom logic - delegates to sqlx wherever possible +//! - Global registry caches new database instances (with their pools) and returns existing ones +//! - WAL mode is enabled lazily only when writes are needed +//! +// TODO: Remove this allow once implementation is complete #![allow(unused)] mod config; mod database; mod error; +mod registry; mod write_guard; // Re-export public types pub use config::SqliteDatabaseConfig; pub use database::SqliteDatabase; -pub use error::{Error, Result}; +pub use error::Error; pub use write_guard::WriteGuard; + +/// A type alias for Results with our custom Error type +pub type Result = std::result::Result; diff --git a/crates/sqlx-sqlite-conn-mgr/src/registry.rs b/crates/sqlx-sqlite-conn-mgr/src/registry.rs new file mode 100644 index 0000000..dd1e66f --- /dev/null +++ b/crates/sqlx-sqlite-conn-mgr/src/registry.rs @@ -0,0 +1,170 @@ +//! Global database registry to cache new database instances and return existing ones + +use crate::Result; +use crate::database::SqliteDatabase; +use std::collections::HashMap; +use std::future::Future; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, OnceLock, Weak}; +use tokio::sync::RwLock; + +/// Global registry for SQLite databases +static DATABASE_REGISTRY: OnceLock>>> = + OnceLock::new(); + +fn registry() -> &'static RwLock>> { + DATABASE_REGISTRY.get_or_init(|| RwLock::new(HashMap::new())) +} + +/// Check if a path represents an in-memory SQLite database +/// +/// Returns true for `:memory:` and `file::memory:*` URIs +pub fn is_memory_database(path: &Path) -> bool { + let path_str = path.to_str().unwrap_or(""); + path_str == ":memory:" + || path_str.starts_with("file::memory:") + || path_str.contains("mode=memory") +} + +/// Get or open a SQLite database connection +/// +/// If a database is already connected, returns the cached instance. +/// Otherwise, calls the provided factory function to create a new connection. +/// +/// Special case: `:memory:` databases should not be cached (each is unique) +pub async fn get_or_open_database(path: &Path, factory: F) -> Result> +where + F: FnOnce() -> Fut, + Fut: Future>, +{ + // Skip registry for in-memory databases - always create new + if is_memory_database(path) { + let db = factory().await?; + return Ok(Arc::new(db)); + } + + // Canonicalize the path for consistent lookups + let canonical_path = canonicalize_path(path)?; + + // Try to get existing database with read lock (allows concurrent reads) + { + let registry = registry().read().await; + + if let Some(weak) = registry.get(&canonical_path) { + if let Some(db) = weak.upgrade() { + return Ok(db); + } + // Weak reference exists but dead - will be cleaned up in write phase + } + } + + // Phase 2: Database not found, acquire write lock + let mut registry = registry().write().await; + + // Double-check: another thread might have created it while we waited for write lock + if let Some(weak) = registry.get(&canonical_path) { + if let Some(db) = weak.upgrade() { + return Ok(db); + } + } + + // Clean up dead weak references while we have the write lock + registry.retain(|_, weak| weak.strong_count() > 0); + + // Now we're sure the database doesn't exist - create it while holding the lock + // This prevents race conditions + let db = factory().await?; + let arc_db = Arc::new(db); + + // Cache the new database + registry.insert(canonical_path, Arc::downgrade(&arc_db)); + + Ok(arc_db) +} + +/// Helper to canonicalize a database path +/// +/// This function attempts to resolve paths to their canonical form to ensure +/// consistent cache lookups. It handles: +/// - Absolute path resolution +/// - Symlink resolution (when file exists) +/// - Parent directory canonicalization (when file doesn't exist yet) +/// +/// Known limitations when file doesn't exist: +/// - Case sensitivity: On case-insensitive filesystems (macOS, Windows), paths +/// differing only in case will be treated as different until the file is created. +/// This could lead to multiple connection pools for the same logical database, at +/// least until the file is created and can be canonicalized properly. +/// - Symlinks in filename: If the filename itself will be a symlink (rare for SQLite), +/// different symlink names won't be resolved until the file exists. +fn canonicalize_path(path: &Path) -> std::io::Result { + match path.canonicalize() { + Ok(p) => Ok(p), + Err(_) => { + // If path doesn't exist, try to canonicalize parent + filename + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let filename = path + .file_name() + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid path"))?; + let canonical_parent = parent.canonicalize()?; + + // Note: We preserve the filename case as provided. On case-insensitive + // filesystems, this means "MyDB.db" and "mydb.db" will create separate + // cache entries until the file exists and can be canonicalized properly. + // This is a known limitation but acceptable since: + // 1. Most apps use consistent casing + // 2. After first connection creates the file, subsequent connects will + // use the canonical (on-disk) case + Ok(canonical_parent.join(filename)) + } + } +} + +/// Remove a database from the cache +/// +/// Special case: `:memory:` databases are never in the registry +/// +/// Returns an error if the path cannot be canonicalized +pub async fn uncache_database(path: &Path) -> std::io::Result<()> { + // Skip registry for in-memory databases + if is_memory_database(path) { + return Ok(()); + } + + // Canonicalize path + let canonical_path = canonicalize_path(path)?; + + let mut registry = registry().write().await; + registry.remove(&canonical_path); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_canonicalize_path() { + let temp_dir = std::env::temp_dir(); + let test_path = temp_dir.join("test.db"); + + // Test that path is canonicalized to absolute path + let canonical = canonicalize_path(&test_path).unwrap(); + assert!(canonical.is_absolute()); + + // Test relative path + let relative_path = Path::new("./test_relative.db"); + let canonical_relative = canonicalize_path(relative_path).unwrap(); + assert!(canonical_relative.is_absolute()); + } + + #[test] + fn test_canonicalize_nonexistent_path() { + let temp_dir = std::env::temp_dir(); + let nonexistent = temp_dir.join("nonexistent_dir").join("test.db"); + + // Should fail if parent directory doesn't exist + let result = canonicalize_path(&nonexistent); + assert!(result.is_err()); + } +} diff --git a/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs b/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs index 4ac1f66..d5f7d81 100644 --- a/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs +++ b/crates/sqlx-sqlite-conn-mgr/src/write_guard.rs @@ -20,7 +20,7 @@ use std::ops::{Deref, DerefMut}; /// use sqlx::query; /// /// # async fn example() -> Result<(), sqlx_sqlite_conn_mgr::Error> { -/// let db = SqliteDatabase::connect("test.db").await?; +/// let db = SqliteDatabase::connect("test.db", None).await?; /// let mut writer = db.acquire_writer().await?; /// // Use &mut *writer for write queries (e.g. INSERT/UPDATE/DELETE) /// query("INSERT INTO users (name) VALUES (?)")