Skip to content

Commit f72185e

Browse files
wkaltclaude
andcommitted
fix: don't mask non-NotFound errors as DatasetNotFound when opening a dataset
resolve_latest_location can fail for many reasons (I/O errors, permission errors, network failures), but the error was unconditionally wrapped as DatasetNotFound. Now only NotFound errors are converted to DatasetNotFound; all other errors propagate with their original type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b80fbb3 commit f72185e

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

rust/lance/src/dataset/builder.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,12 @@ impl DatasetBuilder {
717717
None => commit_handler
718718
.resolve_latest_location(&base_path, &object_store)
719719
.await
720-
.map_err(|e| Error::dataset_not_found(base_path.to_string(), Box::new(e)))?,
720+
.map_err(|e| match &e {
721+
Error::NotFound { .. } => {
722+
Error::dataset_not_found(base_path.to_string(), Box::new(e))
723+
}
724+
_ => e,
725+
})?,
721726
};
722727
let manifest = Dataset::load_manifest(
723728
&object_store,

rust/lance/src/dataset/tests/dataset_io.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,3 +1616,57 @@ async fn test_dataset_uri_roundtrips() {
16161616
dataset.latest_version_id().await.unwrap()
16171617
);
16181618
}
1619+
1620+
/// A commit handler whose resolve_latest_location always returns an IO error.
1621+
/// Used to verify that non-NotFound errors from resolve_latest_location are
1622+
/// propagated as-is rather than being wrapped as DatasetNotFound.
1623+
#[derive(Debug)]
1624+
struct ErroringCommitHandler;
1625+
1626+
#[async_trait::async_trait]
1627+
impl lance_table::io::commit::CommitHandler for ErroringCommitHandler {
1628+
async fn resolve_latest_location(
1629+
&self,
1630+
_base_path: &Path,
1631+
_object_store: &ObjectStore,
1632+
) -> Result<lance_table::io::commit::ManifestLocation> {
1633+
Err(Error::io("simulated I/O error".to_string()))
1634+
}
1635+
1636+
async fn commit(
1637+
&self,
1638+
_manifest: &mut lance_table::format::Manifest,
1639+
_indices: Option<Vec<lance_table::format::IndexMetadata>>,
1640+
_base_path: &Path,
1641+
_object_store: &ObjectStore,
1642+
_manifest_writer: lance_table::io::commit::ManifestWriter,
1643+
_naming_scheme: lance_table::io::commit::ManifestNamingScheme,
1644+
_transaction: Option<lance_table::format::Transaction>,
1645+
) -> std::result::Result<
1646+
lance_table::io::commit::ManifestLocation,
1647+
lance_table::io::commit::CommitError,
1648+
> {
1649+
unimplemented!()
1650+
}
1651+
}
1652+
1653+
#[tokio::test]
1654+
async fn test_open_dataset_non_not_found_error_is_not_masked() {
1655+
// When resolve_latest_location returns an IO error, it should propagate
1656+
// as an IO error, not be wrapped as DatasetNotFound.
1657+
let store = Arc::new(object_store::memory::InMemory::new());
1658+
let location = url::Url::parse("memory://test").unwrap();
1659+
1660+
#[allow(deprecated)]
1661+
let result = DatasetBuilder::from_uri("memory://test")
1662+
.with_object_store(store, location, Arc::new(ErroringCommitHandler))
1663+
.load()
1664+
.await;
1665+
1666+
let err = result.unwrap_err();
1667+
assert!(
1668+
matches!(err, Error::IO { .. }),
1669+
"Expected IO error but got: {:?}",
1670+
err,
1671+
);
1672+
}

0 commit comments

Comments
 (0)