77> Please see the [ prompt] ( #prompt ) if you want help generating an extension.
88
99This document is a guide for authoring Daft native extensions in Rust.
10- Daft supports native Rust extensions by leveraging a stable C ABI and Arrow FFI. Today we support authoring native
10+ Daft supports native Rust extensions by leveraging a stable C ABI based on the
11+ [ Arrow C Data Interface] ( https://arrow.apache.org/docs/format/CDataInterface.html ) .
12+ Extensions are ** not coupled** to any particular Arrow library version. The ABI boundary uses
13+ plain C structs (` ArrowSchema ` , ` ArrowArray ` ) so your extension can use any arrow-rs version
14+ (or even a different Arrow implementation entirely). Today we support authoring native
1115scalar functions, but are actively working on additional native extension features.
1216
1317## Example
@@ -93,16 +97,18 @@ crate-type = ["cdylib"]
9397
9498[dependencies ]
9599daft-ext = <version>
96- arrow-array = " 57.1.0 "
97- arrow-schema = " 57.1.0 "
100+ daft-ext-abi = { version = <version>, features = [ " arrow-58 " ] }
101+ arrow = { version = " 58 " , features = [ " ffi " ] }
98102```
99103
100- !!! tip "Arrow types "
104+ !!! tip "Arrow version freedom "
101105
102- Use `arrow-array` builders and downcasting directly for working with data.
103- The `daft-ext` prelude re-exports common types like `ArrayRef` and `Field`.
104- Import `arrow_array::Array` for the `len()` and `is_null()` methods, and
105- `arrow_array::cast::AsArray` for downcasting (e.g., `as_string`).
106+ The `daft-ext` ABI uses C Data Interface types — your extension is **not** pinned to
107+ Daft's arrow-rs version. Enable a feature flag on `daft-ext-abi` matching your arrow-rs
108+ version (`arrow-56`, `arrow-57`, or `arrow-58`) to get safe `.into()` conversions
109+ between arrow-rs FFI types and the ABI types. For unsupported versions, use the
110+ `from_owned`/`into_owned`/`from_raw`/`as_raw` escape hatches on `ArrowArray`
111+ and `ArrowSchema`.
106112
107113Then update the pyproject to use ` setuptools-rust ` as the build system.
108114
@@ -164,14 +170,15 @@ cat src/lib.rs
164170```
165171
166172``` rust
167- use std :: ffi :: CStr ;
168- use std :: sync :: Arc ;
173+ use std :: {ffi :: CStr , sync :: Arc };
169174
170- use arrow_array :: {Array , ArrayRef };
171- use arrow_array :: builder :: StringBuilder ;
172- use arrow_array :: cast :: AsArray ;
173- use arrow_schema :: {DataType , Field };
175+ use arrow :: {
176+ array :: {Array , builder :: StringBuilder , cast :: AsArray },
177+ datatypes :: {DataType , Field , Schema },
178+ ffi :: FFI_ArrowSchema ,
179+ };
174180use daft_ext :: prelude :: * ;
181+ use daft_ext_abi :: {ArrowData , ArrowSchema };
175182
176183// ── Module ──────────────────────────────────────────────────────────
177184
@@ -181,7 +188,6 @@ use daft_ext::prelude::*;
181188struct HelloExtension ;
182189
183190impl DaftExtension for HelloExtension {
184-
185191 /// This is the extension install hook for defining functions in the session.
186192 /// Called once when the extension is loaded into a session. Register each function here.
187193 fn install (session : & mut dyn DaftSession ) {
@@ -202,25 +208,42 @@ impl DaftScalarFunction for Greet {
202208 }
203209
204210 /// Type checking.
205- /// Given the input `Field` schemas, validate types and return the output `Field`.
206- fn return_field (& self , args : & [Field ]) -> DaftResult <Field > {
211+ /// Receives input fields as C Data Interface `ArrowSchema` types.
212+ /// Use `.as_raw()` / `.into()` to convert between arrow-rs and ABI types.
213+ fn return_field (& self , args : & [ArrowSchema ]) -> DaftResult <ArrowSchema > {
207214 if args . len () != 1 {
208- return Err (DaftError :: TypeError (
209- format! (" greet: expected 1 argument, got {}" , args . len ()),
210- ));
215+ return Err (DaftError :: TypeError (format! (
216+ " greet: expected 1 argument, got {}" ,
217+ args . len ()
218+ )));
211219 }
212- if * args [0 ]. data_type () != DataType :: Utf8 && * args [0 ]. data_type () != DataType :: LargeUtf8 {
213- return Err (DaftError :: TypeError (
214- format! (" greet: expected string argument, got {:?}" , args [0 ]. data_type ()),
215- ));
220+ let ffi_schema : & FFI_ArrowSchema = unsafe { args [0 ]. as_raw () };
221+ let field = Field :: try_from (ffi_schema )
222+ . map_err (| e | DaftError :: TypeError (e . to_string ()))? ;
223+ let dt = field . data_type ();
224+ if * dt != DataType :: Utf8 && * dt != DataType :: LargeUtf8 {
225+ return Err (DaftError :: TypeError (format! (
226+ " greet: expected string argument, got {:?}" ,
227+ dt
228+ )));
216229 }
217- Ok (Field :: new (" greet" , DataType :: Utf8 , true ))
230+ let out_schema = Schema :: new (vec! [Field :: new (" greet" , DataType :: Utf8 , true )]);
231+ let ffi = FFI_ArrowSchema :: try_from (& out_schema )
232+ . map_err (| e | DaftError :: TypeError (e . to_string ()))? ;
233+ Ok (ffi . into ())
218234 }
219235
220- /// Evaluation. Receives Arrow arrays, returns an Arrow array. Operates on entire columns at once.
236+ /// Evaluation. Receives columns as C Data Interface `ArrowData` types.
237+ /// Use `.into()` to convert to/from arrow-rs FFI types.
221238 /// All data flows through Arrow arrays — no per-row Python overhead.
222- fn call (& self , args : & [ArrayRef ]) -> DaftResult <ArrayRef > {
223- let names = args [0 ]. as_string :: <i64 >();
239+ fn call (& self , args : & [ArrowData ]) -> DaftResult <ArrowData > {
240+ let data = unsafe { ArrowData :: take_arg (args , 0 ) };
241+ let ffi_array : arrow :: ffi :: FFI_ArrowArray = data . array. into ();
242+ let ffi_schema : arrow :: ffi :: FFI_ArrowSchema = data . schema. into ();
243+ let arrow_data = unsafe { arrow :: ffi :: from_ffi (ffi_array , & ffi_schema ) }
244+ . map_err (| e | DaftError :: RuntimeError (e . to_string ()))? ;
245+ let input = arrow :: array :: make_array (arrow_data );
246+ let names = input . as_string :: <i64 >();
224247 let mut builder = StringBuilder :: with_capacity (names . len (), names . len () * 16 );
225248 for i in 0 .. names . len () {
226249 if names . is_null (i ) {
@@ -229,11 +252,25 @@ impl DaftScalarFunction for Greet {
229252 builder . append_value (format! (" Hello, {}!" , names . value (i )));
230253 }
231254 }
232- Ok (Arc :: new (builder . finish ()))
255+ let output = builder . finish ();
256+ let (out_arr , out_sch ) = arrow :: ffi :: to_ffi (& output . to_data ())
257+ . map_err (| e | DaftError :: RuntimeError (e . to_string ()))? ;
258+ Ok (ArrowData {
259+ array : out_arr . into (),
260+ schema : out_sch . into (),
261+ })
233262 }
234263}
235264```
236265
266+ !!! tip "ABI pattern"
267+
268+ The `DaftScalarFunction` trait uses C Data Interface types (`ArrowSchema`, `ArrowData`)
269+ at the ABI boundary. Enable a `daft-ext-abi` feature flag (`arrow-56`, `arrow-57`, or
270+ `arrow-58`) matching your arrow-rs version to get `.into()` conversions. Use `.as_raw()`
271+ for zero-copy borrows. This decoupling means your extension is not tied to Daft's
272+ arrow-rs version.
273+
237274!!! tip "String types"
238275
239276 Daft uses `LargeUtf8` (i64 offsets) for strings internally. When downcasting string arrays,
@@ -380,17 +417,18 @@ Follow the Daft extension authoring guide at docs/extensions/index.md. Here is a
380417
381418## Rust conventions
382419
383- - Use ` daft_ext::prelude::* ` for all imports.
384- - Import ` arrow_array::Array ` for ` len() ` /` is_null() ` and ` arrow_array::cast::AsArray ` for downcasting.
420+ - Use ` daft_ext::prelude::* ` for all imports (provides ` ArrowSchema ` , ` ArrowData ` , errors, traits).
421+ - Add ` daft-ext-abi ` with a feature flag matching your arrow version (` arrow-56 ` , ` arrow-57 ` , or ` arrow-58 ` ) for ` .into() ` conversions.
422+ - Import ` arrow::array::Array ` for ` len() ` /` is_null() ` and ` arrow::array::cast::AsArray ` for downcasting.
385423- Daft uses ` LargeUtf8 ` (i64 offsets) for strings — downcast with ` as_string::<i64>() ` , never ` i32 ` .
386424- Apply ` #[daft_extension] ` to a struct implementing ` DaftExtension ` .
387425- Register each function in ` install() ` via ` session.define_function(Arc::new(MyFn)) ` .
388426- Each function is a struct implementing ` DaftScalarFunction ` with:
389427 - ` name(&self) -> &CStr ` — use ` c"<extension_name>_<fn_name>" ` prefix to avoid collisions.
390- - ` return_field(&self, args: &[Field ]) -> DaftResult<Field > ` — validate arg count and types,
391- return ` Err(DaftError::TypeError(...)) ` for violations .
392- - ` call(&self, args: &[ArrayRef ]) -> DaftResult<ArrayRef > ` — compute over Arrow arrays,
393- propagate nulls, return ` Err(DaftError::RuntimeError(...)) ` for failures .
428+ - ` return_field(&self, args: &[ArrowSchema ]) -> DaftResult<ArrowSchema > ` — use ` .as_raw() ` to
429+ borrow as arrow-rs ` FFI_ArrowSchema ` for type checking, then ` .into() ` to return output .
430+ - ` call(&self, args: &[ArrowData ]) -> DaftResult<ArrowData > ` — use ` ArrowData::take_arg ` then
431+ ` .into() ` to convert to arrow-rs FFI types, compute, then ` .into() ` to return the result .
394432
395433## Python conventions
396434
0 commit comments