77> Please see the [ prompt] ( #prompt ) if you want help generating an extension.
88
99This document is a guide for authoring Daft native extensions in Rust.
10- Daft supports native Rust extensions by leveraging a stable C ABI and Arrow FFI. Today we support authoring native
10+ Daft supports native Rust extensions by leveraging a stable C ABI based on the
11+ [ Arrow C Data Interface] ( https://arrow.apache.org/docs/format/CDataInterface.html ) .
12+ Extensions are ** not coupled** to any particular Arrow library version. The ABI boundary uses
13+ plain C structs (` ArrowSchema ` , ` ArrowArray ` ) so your extension can use any arrow-rs version
14+ (or even a different Arrow implementation entirely). Today we support authoring native
1115scalar functions, but are actively working on additional native extension features.
1216
1317## Example
@@ -93,16 +97,16 @@ crate-type = ["cdylib"]
9397
9498[dependencies ]
9599daft-ext = <version>
96- arrow-array = " 57.1.0"
97- arrow-schema = " 57.1.0"
100+ arrow = { version = " 58" , features = [" ffi" ] }
98101```
99102
100- !!! tip "Arrow types "
103+ !!! tip "Arrow version freedom "
101104
102- Use `arrow-array` builders and downcasting directly for working with data.
103- The `daft-ext` prelude re-exports common types like `ArrayRef` and `Field`.
104- Import `arrow_array::Array` for the `len()` and `is_null()` methods, and
105- `arrow_array::cast::AsArray` for downcasting (e.g., `as_string`).
105+ The `daft-ext` ABI uses C Data Interface types — your extension is **not** pinned to
106+ Daft's arrow-rs version. You can use any compatible `arrow-array` / `arrow-schema`
107+ version. The `define_arrow_helpers!()` macro generates `import_array`, `export_array`,
108+ `import_field`, `export_field`, `import_schema`, and `export_schema` helpers for
109+ converting between arrow-rs types and the C ABI types.
106110
107111Then update the pyproject to use ` setuptools-rust ` as the build system.
108112
@@ -164,15 +168,17 @@ cat src/lib.rs
164168```
165169
166170``` rust
167- use std :: ffi :: CStr ;
168- use std :: sync :: Arc ;
171+ use std :: {ffi :: CStr , sync :: Arc };
169172
170- use arrow_array :: {Array , ArrayRef };
171- use arrow_array :: builder :: StringBuilder ;
172- use arrow_array :: cast :: AsArray ;
173- use arrow_schema :: { DataType , Field };
173+ use arrow :: {
174+ array :: { Array , builder :: StringBuilder , cast :: AsArray },
175+ datatypes :: { DataType , Field },
176+ };
174177use daft_ext :: prelude :: * ;
175178
179+ // Generate import_array, export_array, import_field, export_field helpers.
180+ daft_ext :: define_arrow_helpers! ();
181+
176182// ── Module ──────────────────────────────────────────────────────────
177183
178184// #[daft_extension] generates the `daft_module_magic` C symbol that Daft's runtime looks for
@@ -181,7 +187,6 @@ use daft_ext::prelude::*;
181187struct HelloExtension ;
182188
183189impl DaftExtension for HelloExtension {
184-
185190 /// This is the extension install hook for defining functions in the session.
186191 /// Called once when the extension is loaded into a session. Register each function here.
187192 fn install (session : & mut dyn DaftSession ) {
@@ -202,25 +207,33 @@ impl DaftScalarFunction for Greet {
202207 }
203208
204209 /// Type checking.
205- /// Given the input `Field` schemas, validate types and return the output `Field`.
206- fn return_field (& self , args : & [Field ]) -> DaftResult <Field > {
210+ /// Receives input fields as C Data Interface `ArrowSchema` types.
211+ /// Use `import_field` to convert to arrow-rs types for validation,
212+ /// then `export_field` to return the output field.
213+ fn return_field (& self , args : & [ArrowSchema ]) -> DaftResult <ArrowSchema > {
207214 if args . len () != 1 {
208- return Err (DaftError :: TypeError (
209- format! (" greet: expected 1 argument, got {}" , args . len ()),
210- ));
215+ return Err (DaftError :: TypeError (format! (
216+ " greet: expected 1 argument, got {}" ,
217+ args . len ()
218+ )));
211219 }
212- if * args [0 ]. data_type () != DataType :: Utf8 && * args [0 ]. data_type () != DataType :: LargeUtf8 {
213- return Err (DaftError :: TypeError (
214- format! (" greet: expected string argument, got {:?}" , args [0 ]. data_type ()),
215- ));
220+ let field = import_field (& args [0 ])? ;
221+ let dt = field . data_type ();
222+ if * dt != DataType :: Utf8 && * dt != DataType :: LargeUtf8 {
223+ return Err (DaftError :: TypeError (format! (
224+ " greet: expected string argument, got {:?}" ,
225+ dt
226+ )));
216227 }
217- Ok (Field :: new (" greet" , DataType :: Utf8 , true ))
228+ Ok (export_field ( & Field :: new (" greet" , DataType :: Utf8 , true )) ? )
218229 }
219230
220- /// Evaluation. Receives Arrow arrays, returns an Arrow array. Operates on entire columns at once.
231+ /// Evaluation. Receives columns as C Data Interface `ArrowData` types.
232+ /// Use `import_array` / `export_array` to convert to/from arrow-rs arrays.
221233 /// All data flows through Arrow arrays — no per-row Python overhead.
222- fn call (& self , args : & [ArrayRef ]) -> DaftResult <ArrayRef > {
223- let names = args [0 ]. as_string :: <i64 >();
234+ fn call (& self , args : & [ArrowData ]) -> DaftResult <ArrowData > {
235+ let input = import_array (unsafe { ArrowData :: take_arg (args , 0 ) })? ;
236+ let names = input . as_string :: <i64 >();
224237 let mut builder = StringBuilder :: with_capacity (names . len (), names . len () * 16 );
225238 for i in 0 .. names . len () {
226239 if names . is_null (i ) {
@@ -229,11 +242,18 @@ impl DaftScalarFunction for Greet {
229242 builder . append_value (format! (" Hello, {}!" , names . value (i )));
230243 }
231244 }
232- Ok (Arc :: new ( builder . finish ()))
245+ Ok (export_array ( & builder . finish ())? )
233246 }
234247}
235248```
236249
250+ !!! tip "ABI pattern"
251+
252+ The `DaftScalarFunction` trait uses C Data Interface types (`ArrowSchema`, `ArrowData`)
253+ at the ABI boundary. Use the `import_*` / `export_*` helpers from the prelude to convert
254+ to and from arrow-rs types inside your function bodies. This decoupling means your
255+ extension is not tied to Daft's arrow-rs version.
256+
237257!!! tip "String types"
238258
239259 Daft uses `LargeUtf8` (i64 offsets) for strings internally. When downcasting string arrays,
@@ -380,17 +400,18 @@ Follow the Daft extension authoring guide at docs/extensions/index.md. Here is a
380400
381401## Rust conventions
382402
383- - Use ` daft_ext::prelude::* ` for all imports.
384- - Import ` arrow_array::Array ` for ` len() ` /` is_null() ` and ` arrow_array::cast::AsArray ` for downcasting.
403+ - Use ` daft_ext::prelude::* ` for all imports (provides ` ArrowSchema ` , ` ArrowData ` , errors, traits).
404+ - Call ` daft_ext::define_arrow_helpers!() ` to generate ` import_array ` , ` export_array ` , ` import_field ` , ` export_field ` helpers.
405+ - Import ` arrow::array::Array ` for ` len() ` /` is_null() ` and ` arrow::array::cast::AsArray ` for downcasting.
385406- Daft uses ` LargeUtf8 ` (i64 offsets) for strings — downcast with ` as_string::<i64>() ` , never ` i32 ` .
386407- Apply ` #[daft_extension] ` to a struct implementing ` DaftExtension ` .
387408- Register each function in ` install() ` via ` session.define_function(Arc::new(MyFn)) ` .
388409- Each function is a struct implementing ` DaftScalarFunction ` with:
389410 - ` name(&self) -> &CStr ` — use ` c"<extension_name>_<fn_name>" ` prefix to avoid collisions.
390- - ` return_field(&self, args: &[Field ]) -> DaftResult<Field > ` — validate arg count and types,
391- return ` Err(DaftError::TypeError(...)) ` for violations .
392- - ` call(&self, args: &[ArrayRef ]) -> DaftResult<ArrayRef > ` — compute over Arrow arrays,
393- propagate nulls, return ` Err(DaftError::RuntimeError(...)) ` for failures .
411+ - ` return_field(&self, args: &[ArrowSchema ]) -> DaftResult<ArrowSchema > ` — use ` import_field ` to
412+ convert inputs, validate types, then ` export_field ` to return the output field .
413+ - ` call(&self, args: &[ArrowData ]) -> DaftResult<ArrowData > ` — use ` ArrowData::take_arg ` + ` import_array `
414+ to convert inputs to arrow-rs arrays, compute, then ` export_array ` to return the result .
394415
395416## Python conventions
396417
0 commit comments