Skip to content

Commit df7346e

Browse files
committed
Revert "CP tokio_epoll_uring for read path"
This reverts commit 82d9c68.
1 parent 76efb1b commit df7346e

File tree

5 files changed

+46
-128
lines changed

5 files changed

+46
-128
lines changed

Cargo.lock

Lines changed: 10 additions & 68 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pageserver/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ enum-map.workspace = true
8282
enumset.workspace = true
8383
strum.workspace = true
8484
strum_macros.workspace = true
85-
tokio-epoll-uring = { path = "/home/admin/tokio-epoll-uring/tokio-epoll-uring" }
8685

8786
[dev-dependencies]
8887
criterion.workspace = true

pageserver/src/tenant/block_io.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
use super::ephemeral_file::EphemeralFile;
66
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
77
use crate::context::RequestContext;
8-
use crate::page_cache::{self, PageReadGuard, ReadBufResult, PAGE_SZ, PageWriteGuard};
8+
use crate::page_cache::{self, PageReadGuard, ReadBufResult, PAGE_SZ};
99
use crate::virtual_file::VirtualFile;
1010
use bytes::Bytes;
1111
use std::ops::{Deref, DerefMut};
@@ -169,7 +169,7 @@ impl FileBlockReader {
169169
}
170170

171171
/// Read a page from the underlying file into given buffer.
172-
async fn fill_buffer(&self, buf: PageWriteGuard<'static>, blkno: u32) -> Result<PageWriteGuard<'static>, std::io::Error> {
172+
async fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), std::io::Error> {
173173
assert!(buf.len() == PAGE_SZ);
174174
self.file
175175
.read_exact_at(buf, blkno as u64 * PAGE_SZ as u64)
@@ -198,7 +198,7 @@ impl FileBlockReader {
198198
ReadBufResult::Found(guard) => Ok(guard.into()),
199199
ReadBufResult::NotFound(mut write_guard) => {
200200
// Read the page from disk into the buffer
201-
let write_guard = self.fill_buffer(write_guard, blknum).await?;
201+
self.fill_buffer(write_guard.deref_mut(), blknum).await?;
202202
Ok(write_guard.mark_valid().into())
203203
}
204204
}

pageserver/src/tenant/ephemeral_file.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,10 @@ impl EphemeralFile {
8989
return Ok(BlockLease::PageReadGuard(guard))
9090
}
9191
page_cache::ReadBufResult::NotFound(mut write_guard) => {
92-
let write_guard = self.file
93-
.read_exact_at(write_guard, blknum as u64 * PAGE_SZ as u64)
92+
let buf: &mut [u8] = write_guard.deref_mut();
93+
debug_assert_eq!(buf.len(), PAGE_SZ);
94+
self.file
95+
.read_exact_at(&mut buf[..], blknum as u64 * PAGE_SZ as u64)
9496
.await?;
9597
let read_guard = write_guard.mark_valid();
9698
return Ok(BlockLease::PageReadGuard(read_guard));

pageserver/src/virtual_file.rs

Lines changed: 29 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,11 @@
1111
//! src/backend/storage/file/fd.c
1212
//!
1313
use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};
14-
use crate::page_cache::PageWriteGuard;
1514
use crate::tenant::TENANTS_SEGMENT_NAME;
1615
use camino::{Utf8Path, Utf8PathBuf};
1716
use once_cell::sync::OnceCell;
1817
use std::fs::{self, File, OpenOptions};
1918
use std::io::{Error, ErrorKind, Seek, SeekFrom};
20-
use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd};
2119
use std::os::unix::fs::FileExt;
2220
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
2321
use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
@@ -372,7 +370,7 @@ impl VirtualFile {
372370
///
373371
/// We are doing it via a macro as Rust doesn't support async closures that
374372
/// take on parameters with lifetimes.
375-
async fn lock_file(&self) -> Result<FileGuard<'static>, Error> {
373+
async fn lock_file(&self) -> Result<FileGuard<'_>, Error> {
376374
let open_files = get_open_files();
377375

378376
let mut handle_guard = {
@@ -462,59 +460,24 @@ impl VirtualFile {
462460
}
463461

464462
// Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
465-
pub async fn read_exact_at(
466-
&self,
467-
mut write_guard: PageWriteGuard<'static>,
468-
mut offset: u64,
469-
) -> Result<PageWriteGuard<'static>, Error> {
470-
let file_guard: FileGuard<'static> = self.lock_file().await?;
471-
472-
let system = tokio_epoll_uring::thread_local_system().await;
473-
struct PageWriteGuardBuf {
474-
buf: PageWriteGuard<'static>,
475-
init_up_to: usize,
476-
}
477-
unsafe impl tokio_epoll_uring::IoBuf for PageWriteGuardBuf {
478-
fn stable_ptr(&self) -> *const u8 {
479-
self.buf.as_ptr()
480-
}
481-
fn bytes_init(&self) -> usize {
482-
self.init_up_to
483-
}
484-
fn bytes_total(&self) -> usize {
485-
self.buf.len()
486-
}
487-
}
488-
unsafe impl tokio_epoll_uring::IoBufMut for PageWriteGuardBuf {
489-
fn stable_mut_ptr(&mut self) -> *mut u8 {
490-
self.buf.as_mut_ptr()
491-
}
492-
493-
unsafe fn set_init(&mut self, pos: usize) {
494-
assert!(pos <= self.buf.len());
495-
self.init_up_to = pos;
463+
pub async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> Result<(), Error> {
464+
while !buf.is_empty() {
465+
match self.read_at(buf, offset).await {
466+
Ok(0) => {
467+
return Err(Error::new(
468+
std::io::ErrorKind::UnexpectedEof,
469+
"failed to fill whole buffer",
470+
))
471+
}
472+
Ok(n) => {
473+
buf = &mut buf[n..];
474+
offset += n as u64;
475+
}
476+
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
477+
Err(e) => return Err(e),
496478
}
497479
}
498-
let buf = PageWriteGuardBuf {
499-
buf: write_guard,
500-
init_up_to: 0,
501-
};
502-
let owned_fd = unsafe { OwnedFd::from_raw_fd(file_guard.as_ref().as_raw_fd()) };
503-
let guard = scopeguard::guard(file_guard, |_| {
504-
panic!("must not drop future while operation is ongoing (todo: pass file_guard to tokio_epoll_uring to aovid this)")
505-
});
506-
let ((owned_fd, buf), res) = system.read(owned_fd, offset, buf).await;
507-
let _ = OwnedFd::into_raw_fd(owned_fd);
508-
let _ = scopeguard::ScopeGuard::into_inner(guard);
509-
let PageWriteGuardBuf {
510-
buf: write_guard,
511-
init_up_to,
512-
} = buf;
513-
if let Ok(num_read) = res {
514-
assert!(init_up_to == num_read); // TODO need to deal with short reads here
515-
}
516-
res.map(|_| write_guard)
517-
.map_err(|e| Error::new(ErrorKind::Other, e))
480+
Ok(())
518481
}
519482

520483
// Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#219-235
@@ -564,6 +527,18 @@ impl VirtualFile {
564527
Ok(n)
565528
}
566529

530+
pub async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
531+
let result = with_file!(self, StorageIoOperation::Read, |file| file
532+
.as_ref()
533+
.read_at(buf, offset));
534+
if let Ok(size) = result {
535+
STORAGE_IO_SIZE
536+
.with_label_values(&["read", &self.tenant_id, &self.timeline_id])
537+
.add(size as i64);
538+
}
539+
result
540+
}
541+
567542
async fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
568543
let result = with_file!(self, StorageIoOperation::Write, |file| file
569544
.as_ref()

0 commit comments

Comments
 (0)