Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions io-uring-test/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ fn test<S: squeue::EntryMarker, C: cqueue::EntryMarker>(
tests::register::test_register_files_sparse(&mut ring, &test)?;
tests::register_buffers::test_register_buffers(&mut ring, &test)?;
tests::register_buffers::test_register_buffers_update(&mut ring, &test)?;
tests::register_buffers::test_register_buffers_clone(&test)?;
tests::register_buf_ring::test_register_buf_ring(&mut ring, &test)?;
tests::register_sync_cancel::test_register_sync_cancel(&mut ring, &test)?;
tests::register_sync_cancel::test_register_sync_cancel_unsubmitted(&mut ring, &test)?;
Expand Down
82 changes: 82 additions & 0 deletions io-uring-test/src/tests/register_buffers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,85 @@ fn check_only_timeout<S: squeue::EntryMarker, C: cqueue::EntryMarker>(

Err(anyhow::anyhow!("unexpected completion queue entry"))
}

// This test manages its own source and destination rings, so unlike its siblings
// it does not take the harness's shared ring.
pub fn test_register_buffers_clone(test: &Test) -> anyhow::Result<()> {
require!(
test;
test.probe.is_supported(WriteFixed::CODE);
test.probe.is_supported(ReadFixed::CODE);
);

println!("test register_buffers_clone");

const BUF_SIZE: usize = 1 << 12; // Page size

// Use dedicated source and destination rings: cloning requires the destination
// buffer table to be empty, so we can't reuse the harness's shared `ring`.
let src_ring = IoUring::new(8)?;
let mut dst_ring = IoUring::new(8)?;

// The source ring owns the physical buffer registration.
let mut buf = vec![b'Z'; BUF_SIZE];
let iovecs = [libc::iovec {
iov_base: buf.as_mut_ptr().cast(),
iov_len: buf.len(),
}];
// Safety: `buf` outlives the registration; it is unregistered before returning.
unsafe { src_ring.submitter().register_buffers(&iovecs)? };

// Clone the source table into the destination, sharing the pages.
if let Err(e) = dst_ring
.submitter()
.register_buffers_clone(src_ring.as_raw_fd())
{
// IORING_REGISTER_CLONE_BUFFERS requires Linux 6.12+.
if matches!(e.raw_os_error(), Some(libc::EINVAL | libc::ENOTSUP)) {
println!("skipping register_buffers_clone: not supported by this kernel");
src_ring.submitter().unregister_buffers()?;
return Ok(());
}
return Err(e.into());
}

// The destination ring can now drive fixed I/O against the cloned buffer.
let file = tempfile::tempfile()?;
let fd = Fd(file.as_raw_fd());

let write = WriteFixed::new(fd, buf.as_ptr(), BUF_SIZE as u32, 0)
.build()
.user_data(1);
// Safety: buffer index 0 is registered (cloned) and `buf` is valid here.
unsafe { dst_ring.submission().push(&write)? };
assert_eq!(dst_ring.submit_and_wait(1)?, 1);
let cqe = dst_ring.completion().next().unwrap();
assert_eq!(
cqe.result(),
BUF_SIZE as i32,
"WriteFixed via cloned buffer failed"
);

buf.fill(0);
let read = ReadFixed::new(fd, buf.as_mut_ptr(), BUF_SIZE as u32, 0)
.build()
.user_data(2);
// Safety: buffer index 0 is registered (cloned) and `buf` is valid here.
unsafe { dst_ring.submission().push(&read)? };
assert_eq!(dst_ring.submit_and_wait(1)?, 1);
let cqe = dst_ring.completion().next().unwrap();
assert_eq!(
cqe.result(),
BUF_SIZE as i32,
"ReadFixed via cloned buffer failed"
);
assert!(
buf.iter().all(|&x| x == b'Z'),
"data round-tripped incorrectly"
);

dst_ring.submitter().unregister_buffers()?;
src_ring.submitter().unregister_buffers()?;

Ok(())
}
53 changes: 52 additions & 1 deletion src/submit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{io, mem, ptr};

use crate::register::{execute, Probe};
use crate::sys;
use crate::types::{CancelBuilder, Timespec};
use crate::types::{CancelBuilder, CloneBuffersFlags, Timespec};
use crate::util::{cast_ptr, OwnedFd};
use crate::Parameters;
use bitflags::bitflags;
Expand Down Expand Up @@ -349,6 +349,57 @@ impl<'a> Submitter<'a> {
.map(drop)
}

/// Clone the entire registered buffer table from another ring into this one.
///
/// `src_fd` is the raw file descriptor of the source `io_uring`. The source's
/// buffers are shared with this ring rather than copied, so a single physical
/// registration can back many rings without re-pinning the pages in the kernel.
///
/// This ring's buffer table must be empty. To clone into a non-empty table or
/// to copy a sub-range, use
/// [`register_buffers_clone_offset`](Self::register_buffers_clone_offset).
///
/// Available since Linux 6.12.
pub fn register_buffers_clone(&self, src_fd: RawFd) -> io::Result<()> {
self.register_buffers_clone_offset(src_fd, 0, 0, 0, CloneBuffersFlags::empty())
}

/// Clone a range of the registered buffer table from another ring into this one.
///
/// `src_fd` is the raw file descriptor of the source `io_uring`. `nr` buffers
/// starting at `src_off` in the source table are installed starting at `dst_off`
/// in this ring's table. A `nr` of `0` clones the source's entire table.
///
/// See [`CloneBuffersFlags`] for replacing an existing destination range or
/// treating `src_fd` as a registered ring descriptor.
///
/// Available since Linux 6.12.
pub fn register_buffers_clone_offset(
&self,
src_fd: RawFd,
src_off: u32,
dst_off: u32,
nr: u32,
flags: CloneBuffersFlags,
) -> io::Result<()> {
let arg = sys::io_uring_clone_buffers {
src_fd: src_fd as _,
flags: flags.bits(),
src_off,
dst_off,
nr,
..Default::default()
};
execute(
self.fd.as_raw_fd(),
sys::IORING_REGISTER_CLONE_BUFFERS,
cast_ptr::<sys::io_uring_clone_buffers>(&arg).cast(),
// This opcode takes a single struct; the kernel requires nr_args == 1.
1,
)
.map(drop)
}

/// Registers an empty file table of nr_files number of file descriptors. The sparse variant is
/// available in kernels 5.19 and later.
///
Expand Down
17 changes: 17 additions & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,23 @@ bitflags! {
}
}

bitflags! {
/// Options for
/// [`Submitter::register_buffers_clone`](super::Submitter::register_buffers_clone) and
/// [`Submitter::register_buffers_clone_offset`](super::Submitter::register_buffers_clone_offset).
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct CloneBuffersFlags: u32 {
/// Interpret `src_fd` as a registered ring descriptor rather than a
/// raw file descriptor.
const SRC_REGISTERED = sys::IORING_REGISTER_SRC_REGISTERED;

/// Allow cloning into a destination range that already has buffers
/// registered, replacing them. Without this flag the destination
/// range must be empty.
const DST_REPLACE = sys::IORING_REGISTER_DST_REPLACE;
}
}

/// Wrapper around `open_how` as used in [the `openat2(2)` system
/// call](https://man7.org/linux/man-pages/man2/openat2.2.html).
#[derive(Default, Debug, Clone, Copy)]
Expand Down
Loading