diff --git a/io-uring-test/src/main.rs b/io-uring-test/src/main.rs index afabcb24..042db589 100644 --- a/io-uring-test/src/main.rs +++ b/io-uring-test/src/main.rs @@ -83,6 +83,7 @@ fn test( tests::register::test_register_files_sparse(&mut ring, &test)?; tests::register_buffers::test_register_buffers(&mut ring, &test)?; tests::register_buffers::test_register_buffers_update(&mut ring, &test)?; + tests::register_buffers::test_register_buffers_clone(&test)?; tests::register_buf_ring::test_register_buf_ring(&mut ring, &test)?; tests::register_sync_cancel::test_register_sync_cancel(&mut ring, &test)?; tests::register_sync_cancel::test_register_sync_cancel_unsubmitted(&mut ring, &test)?; diff --git a/io-uring-test/src/tests/register_buffers.rs b/io-uring-test/src/tests/register_buffers.rs index feeb3a8b..2896072d 100644 --- a/io-uring-test/src/tests/register_buffers.rs +++ b/io-uring-test/src/tests/register_buffers.rs @@ -349,3 +349,85 @@ fn check_only_timeout( Err(anyhow::anyhow!("unexpected completion queue entry")) } + +// This test manages its own source and destination rings, so unlike its siblings +// it does not take the harness's shared ring. +pub fn test_register_buffers_clone(test: &Test) -> anyhow::Result<()> { + require!( + test; + test.probe.is_supported(WriteFixed::CODE); + test.probe.is_supported(ReadFixed::CODE); + ); + + println!("test register_buffers_clone"); + + const BUF_SIZE: usize = 1 << 12; // Page size + + // Use dedicated source and destination rings: cloning requires the destination + // buffer table to be empty, so we can't reuse the harness's shared `ring`. + let src_ring = IoUring::new(8)?; + let mut dst_ring = IoUring::new(8)?; + + // The source ring owns the physical buffer registration. + let mut buf = vec![b'Z'; BUF_SIZE]; + let iovecs = [libc::iovec { + iov_base: buf.as_mut_ptr().cast(), + iov_len: buf.len(), + }]; + // Safety: `buf` outlives the registration; it is unregistered before returning. + unsafe { src_ring.submitter().register_buffers(&iovecs)? }; + + // Clone the source table into the destination, sharing the pages. + if let Err(e) = dst_ring + .submitter() + .register_buffers_clone(src_ring.as_raw_fd()) + { + // IORING_REGISTER_CLONE_BUFFERS requires Linux 6.12+. + if matches!(e.raw_os_error(), Some(libc::EINVAL | libc::ENOTSUP)) { + println!("skipping register_buffers_clone: not supported by this kernel"); + src_ring.submitter().unregister_buffers()?; + return Ok(()); + } + return Err(e.into()); + } + + // The destination ring can now drive fixed I/O against the cloned buffer. + let file = tempfile::tempfile()?; + let fd = Fd(file.as_raw_fd()); + + let write = WriteFixed::new(fd, buf.as_ptr(), BUF_SIZE as u32, 0) + .build() + .user_data(1); + // Safety: buffer index 0 is registered (cloned) and `buf` is valid here. + unsafe { dst_ring.submission().push(&write)? }; + assert_eq!(dst_ring.submit_and_wait(1)?, 1); + let cqe = dst_ring.completion().next().unwrap(); + assert_eq!( + cqe.result(), + BUF_SIZE as i32, + "WriteFixed via cloned buffer failed" + ); + + buf.fill(0); + let read = ReadFixed::new(fd, buf.as_mut_ptr(), BUF_SIZE as u32, 0) + .build() + .user_data(2); + // Safety: buffer index 0 is registered (cloned) and `buf` is valid here. + unsafe { dst_ring.submission().push(&read)? }; + assert_eq!(dst_ring.submit_and_wait(1)?, 1); + let cqe = dst_ring.completion().next().unwrap(); + assert_eq!( + cqe.result(), + BUF_SIZE as i32, + "ReadFixed via cloned buffer failed" + ); + assert!( + buf.iter().all(|&x| x == b'Z'), + "data round-tripped incorrectly" + ); + + dst_ring.submitter().unregister_buffers()?; + src_ring.submitter().unregister_buffers()?; + + Ok(()) +} diff --git a/src/submit.rs b/src/submit.rs index cef91302..3062e87a 100644 --- a/src/submit.rs +++ b/src/submit.rs @@ -4,7 +4,7 @@ use std::{io, mem, ptr}; use crate::register::{execute, Probe}; use crate::sys; -use crate::types::{CancelBuilder, Timespec}; +use crate::types::{CancelBuilder, CloneBuffersFlags, Timespec}; use crate::util::{cast_ptr, OwnedFd}; use crate::Parameters; use bitflags::bitflags; @@ -349,6 +349,57 @@ impl<'a> Submitter<'a> { .map(drop) } + /// Clone the entire registered buffer table from another ring into this one. + /// + /// `src_fd` is the raw file descriptor of the source `io_uring`. The source's + /// buffers are shared with this ring rather than copied, so a single physical + /// registration can back many rings without re-pinning the pages in the kernel. + /// + /// This ring's buffer table must be empty. To clone into a non-empty table or + /// to copy a sub-range, use + /// [`register_buffers_clone_offset`](Self::register_buffers_clone_offset). + /// + /// Available since Linux 6.12. + pub fn register_buffers_clone(&self, src_fd: RawFd) -> io::Result<()> { + self.register_buffers_clone_offset(src_fd, 0, 0, 0, CloneBuffersFlags::empty()) + } + + /// Clone a range of the registered buffer table from another ring into this one. + /// + /// `src_fd` is the raw file descriptor of the source `io_uring`. `nr` buffers + /// starting at `src_off` in the source table are installed starting at `dst_off` + /// in this ring's table. A `nr` of `0` clones the source's entire table. + /// + /// See [`CloneBuffersFlags`] for replacing an existing destination range or + /// treating `src_fd` as a registered ring descriptor. + /// + /// Available since Linux 6.12. + pub fn register_buffers_clone_offset( + &self, + src_fd: RawFd, + src_off: u32, + dst_off: u32, + nr: u32, + flags: CloneBuffersFlags, + ) -> io::Result<()> { + let arg = sys::io_uring_clone_buffers { + src_fd: src_fd as _, + flags: flags.bits(), + src_off, + dst_off, + nr, + ..Default::default() + }; + execute( + self.fd.as_raw_fd(), + sys::IORING_REGISTER_CLONE_BUFFERS, + cast_ptr::(&arg).cast(), + // This opcode takes a single struct; the kernel requires nr_args == 1. + 1, + ) + .map(drop) + } + /// Registers an empty file table of nr_files number of file descriptors. The sparse variant is /// available in kernels 5.19 and later. /// diff --git a/src/types.rs b/src/types.rs index ef8d66e8..1eab8cc0 100644 --- a/src/types.rs +++ b/src/types.rs @@ -155,6 +155,23 @@ bitflags! { } } +bitflags! { + /// Options for + /// [`Submitter::register_buffers_clone`](super::Submitter::register_buffers_clone) and + /// [`Submitter::register_buffers_clone_offset`](super::Submitter::register_buffers_clone_offset). + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + pub struct CloneBuffersFlags: u32 { + /// Interpret `src_fd` as a registered ring descriptor rather than a + /// raw file descriptor. + const SRC_REGISTERED = sys::IORING_REGISTER_SRC_REGISTERED; + + /// Allow cloning into a destination range that already has buffers + /// registered, replacing them. Without this flag the destination + /// range must be empty. + const DST_REPLACE = sys::IORING_REGISTER_DST_REPLACE; + } +} + /// Wrapper around `open_how` as used in [the `openat2(2)` system /// call](https://man7.org/linux/man-pages/man2/openat2.2.html). #[derive(Default, Debug, Clone, Copy)]